Access body of PUT or POST request using FastCGI in C - c

I'm facing some issues trying to get my body content in my monkey server.
In fact, I don't really know how to access my data using fastcgi library functions.
I'm sending with Postman a PUT request on http://my.server.address:myport/cgi/something
with the following JSON content:
{ "hello" : "bonjour" }
On the server side
running on the main thread :
int32_t SWEB_traiter_requette_f(int32_t i32_socket_listen) {
FCGX_Request st_request;
(void)FCGX_InitRequest(&st_request, i32_socket_listen, 0);
if (FCGX_Accept_r(&st_request) == 0) {
ULOG_log_f(ULOG_PRIO_INFO, "accepting request");
(void)pthread_mutex_lock(gpu_mutex_s_web);
traiter_requette_s_web_f(&st_request,FCGX_GetParam("REQUEST_URI", st_request.envp));
(void)pthread_mutex_unlock(gpu_mutex_s_web);
(void)FCGX_Finish_r(&st_request);
}
FCGX_Free(&st_request,i32_socket_listen);
return 0;
}
And this is how I handle the request :
static void traiter_requette_s_web_f(FCGX_Request *pst_request,const char * pc_url) {
size_t z_len;
char_t *pc_data;
int i = 0;
int ch;
char_t *sz_content_len = FCGX_GetParam("CONTENT_LENGTH" , pst_request->envp);
char_t *sz_method = FCGX_GetParam("REQUEST_METHOD" , pst_request->envp);
char_t *sz_contenttype = FCGX_GetParam("CONTENT_TYPE" , pst_request->envp);
if (sz_contenttype != NULL){
/* first method ..... not working */
ch = FCGX_GetChar(pst_request->in);
while(ch != -1){
i++;
z_len = strtol(sz_content_len, NULL, 10);
pc_data = calloc(1,z_len+1);
if (pc_data == NULL){
//LCOV_EXCL_START
ULOG_log_f(ULOG_PRIO_ERREUR, "Erreur d'allocation de psz_data");
return;
//LCOV_EXCL_STOP
}
pc_data[i-1] = (char_t) ch;
ch = FCGX_GetChar(pst_request->in);
if (ch == -1 )
{
pc_data=(char*)realloc(pc_data,(i + 1)*sizeof(char));
pc_data[i] = '\0';
}
}
printf("data !! : %s\n",pc_data);
/* second method .... not working */
z_len = strtol(sz_content_len, NULL, 10);
pc_data = calloc(1,z_len+1);
if (pc_data == NULL){
//LCOV_EXCL_START
ULOG_log_f(ULOG_PRIO_ERREUR, "Erreur d'allocation de psz_data");
return;
//LCOV_EXCL_STOP
}
(void)FCGX_GetStr(pc_data,z_len,pst_request->in);
printf("data !! : %s\n",pc_data);
}
}
Maybe I'm doing something wrong with pc_data and this is not how to access the body.
How can I access the body of my request?

It seems that my code was leaking.
This is how you get the body content on a POST request :
static void traiter_requette_s_web_f(FCGX_Request *pst_request,const char * pc_url) {
size_t z_len;
int i = 0;
int ch;
char_t *sz_content_len = FCGX_GetParam("CONTENT_LENGTH" , pst_request->envp);
char_t *sz_method = FCGX_GetParam("REQUEST_METHOD" , pst_request->envp);
char_t *sz_contenttype = FCGX_GetParam("CONTENT_TYPE" , pst_request->envp);
char_t *pc_data = FCGX_GetParam("REQUEST_URI", pst_request->envp);
if (sz_contenttype != NULL)
{
z_len = strtol(sz_content_len, NULL, 10);
pc_data = calloc(1,z_len+1);
if (pc_data == NULL){
return;
}
ch = FCGX_GetChar(pst_request->in);
while(ch != -1){
i++;
pc_data[i-1] = (char_t) ch;
ch = FCGX_GetChar(pst_request->in);
if (ch == -1 )
{
pc_data=(char*)realloc(pc_data,(i + 1)*sizeof(char));
pc_data[i] = '\0';
}
}
printf("data !! : %s\n",pc_data);
}
}

According to https://fossies.org/dox/FCGI-0.78/ you can read body content on a POST request
#define DATA_READ_CHUNK 81912
void read_payload(size_t content_length, std::string&str) {
size_t read_length = 0;
size_t len = 0;
//DATA_READ_CHUNK should be 8192 according to FCGX_Accept_r(FCGX_Request *reqDataPtr) line 2154 file fcgiapp.c
while (true) {
char* buff;
if (content_length > DATA_READ_CHUNK) {
buff = (char*)malloc(DATA_READ_CHUNK + 1);
len = DATA_READ_CHUNK;
}
else {
buff = (char*)malloc(content_length + 1);
len = content_length;
}
buff[len] = '\0';
std::cin.read(buff, len);
str.append(buff, len);
free(buff);
content_length -= len;
if (content_length <= 0)break;
}
fclose(stdin);
}

Related

Why gdb showed /stdlib/strtol_l.c: No such file or directory? Do I missing something to install?

I tried to compile with -g and then run gdb to find the line that caused the segmentation fault, but the error message confused me.
Program received signal SIGSEGV, Segmentation fault.
__GI_____strtol_l_internal (nptr=0x0, endptr=endptr#entry=0x0, base=base#entry=10, group=group#entry=0, loc=0x7ffff7fb04a0 <_nl_global_locale>)
at ../stdlib/strtol_l.c:292
292 ../stdlib/strtol_l.c: No such file or directory.
I tried reinstalling gdb to get it working again, but I failed. It still shows the same error message. I later found the problem myself and marked it in the code below. I'm just curious why something like this sometimes happens when I try to debug some string functions? Like strdup, strtok, strtol, etc.. Am I missing something to install? I hope I can solve this problem completely.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
char buff[255];
#define NUM_BUCKETS 32
typedef struct Customer {
char* email;
char* name;
int shoesize;
char* food;
struct Customer* next;
} Customer ;
unsigned long hash(char *str) {
unsigned long hash = 0;
int c;
while (*str != '\0') {
c = *str;
hash = ((hash << 5) + hash) + (unsigned char)c;
str++;
}
return hash;
}
Customer *add_friend_to_list(char *email, char *name, int shoesize, char *food, Customer *bucket) {
Customer* customer;
customer = malloc(sizeof(Customer));
customer->name = strdup(name);
customer->food = strdup(food);
customer->shoesize = shoesize;
customer->email = strdup(email);
customer->next = bucket;
return customer;
}
void add_consumer_to_hashtable(char *name, char *food, char *email, int shoesize, Customer **buckets, size_t num_buckets) {
size_t which_bucket = hash(name) % num_buckets;
buckets[which_bucket] = add_friend_to_list(email, name, shoesize, food, buckets[which_bucket]);
}
int main() {
Customer* buckets[NUM_BUCKETS] = {NULL};
int ittime = 0;
FILE *fp = NULL;
fp = fopen("customers.tsv", "r");
while (true) {
fgets(buff, 255, fp);
if (feof(fp)) {
break;
}
ittime++;
}
fclose(fp);
fp = NULL;
char *email = (char *)malloc(5 * sizeof(char));
char *name = (char *)malloc(5 * sizeof(char));
int shoesize;
char *food = (char *)malloc(5 * sizeof(char));
const char s[2] = "\t";
fp = fopen("customers.tsv", "r");
for (int i = 0; i < ittime + 1; i++) { //This line cause the Segmentation Fault
fgets(buff, 255, fp);
char *token;
token = strtok(buff, s);
email = token;
token = strtok(NULL, s);
name = token;
token = strtok(NULL, s);
shoesize = atoi(token);
token = strtok(NULL, s);
food = token;
add_consumer_to_hashtable(name, food, email, shoesize, buckets, NUM_BUCKETS);
}
fclose(fp);
while (true) {
char *cmd = (char *)malloc(5 * sizeof(char));
printf("command: ");
scanf("%s", cmd);
if (strcmp(cmd, "add") == 0) {
char *email1 = (char *)malloc(5 * sizeof(char));
char *name1 = (char *)malloc(5 * sizeof(char));
int shoesize1;
char *food1 = (char *)malloc(5 * sizeof(char));
printf("email address? ");
scanf("%s", email1);
printf("name? ");
scanf(" %[^\n]", name1);
printf("shoe size? ");
scanf("%d", &shoesize1);
printf("favorite food? ");
scanf("%s", food1);
add_consumer_to_hashtable(name1, food1, email1, shoesize1, buckets, NUM_BUCKETS);
free(name1);
free(food1);
free(email1);
} else if (strcmp(cmd, "lookup") == 0) {
char *Email = (char *)malloc(5 * sizeof(char));
printf("email address? ");
scanf("%s", Email);
bool exist = false;
for (int i = 0; i < 32; i++) {
Customer *cus = buckets[i];
if (buckets[i] == NULL) {
continue;
}
while ((cus != NULL)) {
if (cus->shoesize == EOF) {
break;
}
if (strcmp(cus->email, Email) == 0) {
printf("email: %s\n", cus->email);
printf("name: %s\n", cus->name);
printf("shoesize: %d\n", cus->shoesize);
printf("food: %s\n", cus->food);
exist = true;
break;
}
if (cus->next != NULL) {
cus = cus->next;
} else {
break;
}
}
}
if (exist == false) {
printf("user not found!\n");
}
} else if (strcmp(cmd, "delete") == 0) {
char *Email = (char *)malloc(5 * sizeof(char));
printf("email address? ");
scanf("%s", Email);
bool exist = false;
for (int i = 0; i < 32; i++) {
Customer *cus = buckets[i];
if (buckets[i] == NULL) {
continue;
}
while ((cus != NULL)) {
if (cus->shoesize == EOF) {
break;
}
if (strcmp(cus->email, Email) == 0) {
free(cus->email);
free(cus->food);
free(cus->name);
free(cus);
cus->shoesize = EOF;
cus = NULL;
exist = true;
break;
}
if (cus->next != NULL) {
cus = cus->next;
} else {
break;
}
}
}
if (exist == false) {
printf("user not found!\n");
}
} else if (strcmp(cmd, "list") == 0) {
for (int i = 0; i < 32; i++) {
Customer *cus = buckets[i];
if (buckets[i] == NULL) {
continue;
}
while ((cus != NULL) && ((cus->shoesize) != EOF)) {
printf("email: %s\n", cus->email);
printf("name: %s\n", cus->name);
printf("shoesize: %d\n", cus->shoesize);
printf("food: %s\n", cus->food);
if (cus->next != NULL) {
cus = cus->next;
printf("\n");
} else {
break;
}
}
}
} else if (strcmp(cmd, "quit") == 0) {
break;
} else if (strcmp(cmd, "save") == 0) {
fp = fopen("customers.tsv", "w");
for (int i = 0; i < 32; i++) {
Customer *cus = buckets[i];
if (buckets[i] == NULL) {
continue;
}
while ((cus != NULL) && ((cus->shoesize) != EOF)) {
fprintf(fp, "%s\t%s\t%d\t%s", cus->email, cus->name, cus->shoesize, cus->food);
if (cus->next != NULL) {
cus = cus->next;
fprintf(fp, "\n");
} else {
break;
}
}
}
fclose(fp);
} else {
printf("unknown command\n");
}
}
for (int i = 0; i < 32; i++) {
Customer *tmp;
Customer *cus = buckets[i];
if (cus == NULL) {
continue;
}
if (cus->next != NULL) {
tmp = cus;
cus = cus->next;
} else {
break;
}
while ((tmp != NULL)) {
if (tmp->shoesize != EOF) {
free(tmp->email);
free(tmp->food);
free(tmp->name);
free(tmp);
}
cus->shoesize = EOF;
cus = NULL;
}
if (tmp != NULL) {
free(tmp);
}
if (cus != NULL) {
free(cus);
}
}
return 0;
}
I tried to compile with -g and then run gdb to find the line that caused the segmentation fault, but the error message confused me.
The error message means:
crash happened inside GLIBC strtol_l_internal() function
GDB can't show you the source of that function because libc6-src (or similar) package is not installed.
Now, looking at the source for strtol_l_internal() is not going to be helpful -- the root cause of the problem is that you called it with incorrect parameter.
You should read man strtol and verify that you satisfied its preconditions.
It looks like you called strtol(NULL, NULL, ...), which is not a valid thing to do. You could use (gdb) up command to find out where the wrong call came from, and fix the caller.

How to Parse an AT command response and one among the fields from the output in C

Im trying to capture the data from the AT command response but im unable to do so.
My Approach.
functions():
#define MAX_LINE_LENGTH (8 * 1024)
static char buf[MAX_LINE_LENGTH];
static char buf2[MAX_LINE_LENGTH];
static bool tr_lf_cr(const char *s)
{
char *p;
p = strchr(s, '\n');
if (p == NULL || p[1] != '\0') {
return false;
}
*p = '\r';
return true;
}
static void strip_cr(char *s)
{
char *from, *to;
from = to = s;
while (*from != '\0') {
if (*from == '\r') {
from++;
continue;
}
*to++ = *from++;
}
*to = '\0';
}
#define STARTS_WITH(a, b) ( strncmp((a), (b), strlen(b)) == 0)
main()
fd = fopen(*mp, "r+b");
if (fd == NULL) {
/* Could not open the port. */
perror("open_port: Unable to open /dev/ttyUSB0\n");
}
char str = '\n';
strncat(cmd, &str, 1);
success = tr_lf_cr(cmd);
if (! success) {
fprintf(stderr, "invalid string: '%s'\n", cmd);
return EXIT_FAILURE;
}
int res = fputs(cmd, fd);
if (res < 0) {
fprintf(stderr, "failed to send '%s' to modem (res = %d)\n", cmd, res);
return EXIT_FAILURE;
}
do {
line = fgets(buf, (int)sizeof(buf), fd);
if (line == NULL) {
fprintf(stderr, "EOF from modem\n");
return EXIT_FAILURE;
}
strcpy(buf2, line);
strip_cr(buf2);
char delim[] = ",";
char *ptr = strtok(buf2, delim);
printf("\n0++++++++++++++++++++\n");
while (ptr != NULL) {
printf("'%s'\n", ptr);
ptr = strtok(NULL, delim);
}
printf("\n1********************\n");
} while (STARTS_WITH(line, "OK") == 0);
I get the following output when i run this command AT^HCSQ?
0++++++++++++++++++++
'AT^HCSQ?
'
0++++++++++++++++++++
'^HCSQ: "WCDMA"'
'64'
'64'
'60'
'0
'
0++++++++++++++++++++
'
'
0++++++++++++++++++++
'OK
'
What i want to achieve is to store each value separately from the buffer like
char p = WCDMA;
int rssi = atoi[ptr];
[etc...]
Im using strtok() to achieve this but im unable to skip the empty lines i get from the response. What should i do here?

realloc : corrupted data returned

I'm trying to read from a file using C and after shrinking the size using realloc I get corrupted data. I don't really see what the problem could be.
Here's the function that returns the string :
char *read_string(FILE *fichier) {
char car = 0;
size_t size = 1;
char *symbole = realloc(NULL, sizeof(char) * size);
char *s;
size_t len = 0;
if (!symbole)
return symbole;
else
s = symbole;
do {
car = getc(fichier);
} while (car != '"' && car != EOF);
if (car == EOF)
return EOFP;
else {
car = getc(fichier);
while (car != '"' ) {
s[len] = car;
car = getc(fichier);
len++;
if (len == size) {
symbole = realloc(s, sizeof(char) * (size += 1));
if (!symbole)
return symbole;
else
s = symbole;
}
}
s[len] = '\0' ;
symbole = realloc(s, sizeof(char) * len);
if (!symbole) {
printf("WTF");
return symbole;
} else
s = symbole;
return s;
}
}
My main function is:
int main(int argc, char *argv[]) {
FILE *fichier = NULL;
fichier = fopen("C:/Users/Nabila K/Documents/test.json", "r");
if ((fichier != NULL)) {
while (feof(fichier) == 0) {
char *test = read_string(fichier);
if (test == NULL) {
printf("test == NULL\n");
exit(1);
} else
if (test == EOFP) {
} else {
printf("%s\n", test);
free(test);
}
}
fclose(fichier);
} else {
exit(EXIT_FAILURE);
}
return 0;
}
UPDATE
My json file looks something like this :
{
"KARIM BENNI" : {
"2017-08-07 09:50:50" : {
"Anomalie" : {
"description" : "Test",
"theme" : "Engins mobiles"
},
"date" : "2017-08-07",
"date_now" : "2017-08-07 09:50:50",
"entite" : "USINE LAMINAGE A FROID",
"etat" : "Cree",
"nb_personne" : 2,
"temps" : 5,
"visiteur" : "KARIM BENNI",
"visite" : "AHMED RABII",
"zone" : "COUPE"
}
}
}
There are multiple issues in your code:
char car = 0; is incorrect: you must define car as int to correctly distinguish all values returned by getc(), especially EOF.
while (feof(fichier) == 0) is always wrong. Learn why there: Why is “while ( !feof (file) )” always wrong?
EOFP is not defined, you should probably use NULL instead for more clarity.
the final realloc() to shrink the allocated block is one byte too short. You must keep len+1 bytes for len characters plus the null terminator.
Here is a simplified and corrected version:
#include <stdio.h>
#include <stdlib.h>
char EOFP[1]; /* special value used to signal end of file */
char *read_string(FILE *file) {
int c;
size_t size, len;
char *symbol;
char *s;
while ((c = getc(file)) != '"') {
if (c == EOF)
return EOFP;
}
size = 16;
len = 0;
symbol = malloc(size);
if (symbol == NULL) {
/* allocation failure */
return NULL;
}
while ((c = getc(file)) != '"') {
if (c == EOF) {
/* premature end of file in the middle of a string */
free(symbol);
return EOFP;
}
if (len + 2 < size) {
size += size;
s = realloc(symbol, size);
if (s == NULL) {
/* allocation failure */
free(symbol);
return NULL;
}
symbol = s;
}
symbol[len++] = c;
}
symbol[len] = '\0';
s = realloc(symbol, len + 1);
return s ? s : symbol;
}
int main(int argc, char *argv[]) {
FILE *file = fopen("C:/Users/Nabila K/Documents/test.json", "r");
if (file != NULL)) {
char *test;
while ((test = read_string(file)) != EOFP) {
if (test == NULL) {
printf("test == NULL\n");
exit(1);
} else {
printf("%s\n", test);
free(test);
}
}
fclose(file);
} else {
exit(EXIT_FAILURE);
}
return 0;
}
Notes:
Parsing the full JSON syntax for strings would be required if the strings can contain escaped characters such as \" or \n, \\ etc.

Fix Buffer Overflow Exploit on Web Server

I have a buffer overflow vulnerability in a simple webserver. It can be exploited with a http GET request. I'm having trouble figuring out how to fix it. My guess is that it has to do with: char hdrval[1024]; but I could be wrong. Can anyone else see whats wrong?
Code:
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <netdb.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>
#include <pthread.h>
#define _XOPEN_SOURCE
typedef struct {
char *method;
char *uri;
char *version;
char *headers;
} httpreq_t;
/* NOTE: this function is based on a function provided in the GNU "timegm" man
page. timegm is a GNU extension to time.h that returns the given tm struct as
a UNIX timestamp in GMT/UTC, rather than local time. The man page suggests a
function similar to the one below as a portable equivalent.
*/
time_t my_timegm(struct tm *tm) {
time_t ret;
char *tz;
tz = getenv("TZ");
putenv("TZ=GMT");
tzset();
ret = mktime(tm);
if (tz) {
char envstr[strlen(tz) + 4];
envstr[0] = '\0';
strcat(envstr, "TZ=");
strcat(envstr, tz);
putenv(envstr);
} else {
putenv("TZ=");
}
tzset();
return ret;
}
char *get_header(const httpreq_t *req, const char* headername) {
char *hdrptr;
char *hdrend;
char *retval = NULL;
char searchstr[strlen(headername) + 5];
strcpy(searchstr, "\r\n");
strcat(searchstr, headername);
strcat(searchstr, ": ");
if (hdrptr = strstr(req->headers, searchstr)) {
hdrptr += strlen(searchstr);
if (hdrend = strstr(hdrptr, "\r\n")) {
char hdrval[1024]; // temporary return value
memcpy((char *)hdrval, hdrptr, (hdrend - hdrptr));
hdrval[hdrend - hdrptr] = '\0'; // tack null onto end of header value
int hdrvallen = strlen(hdrval);
retval = (char *)malloc((hdrvallen + 1) * sizeof(char)); // malloc a space for retval
strcpy(retval, (char *)hdrval);
} else {
retval = (char *)malloc((strlen(hdrptr) + 1) * sizeof(char)); //
strcpy(retval, hdrptr);
}
}
return retval;
}
/* As long as str begins with a proper HTTP-Version followed by delim, returns a
pointer to the start of the version number (e.g., 1.0). Returns NULL otherwise.
*/
char *http_version_str(char *str, char *delim) {
char *vstart = strstr(str, "HTTP/");
char *vnumstart = str + 5;
char *vdot = strchr(str, '.');
char *vend = strstr(str, delim);
char *digits = "0123456789";
int majvlen = 0;
int minvlen = 0;
if (!vstart || !vdot // something's missing
|| vstart != str) // str doesn't start with "HTTP/"
return NULL;
majvlen = strspn(vnumstart, digits);
minvlen = strspn(vdot + 1, digits);
if (majvlen < 1 || (vnumstart + majvlen) != vdot // bad major version
|| minvlen < 1 || (vdot + minvlen + 1) != vend) // bad minor version
return NULL;
return vnumstart;
}
/* Fills req with the request data from datastr. Returns 0 on success.
*/
int parsereq(httpreq_t *req, char *datastr) {
char *position;
char *last_position = datastr;
char *temp_position;
int matchlen;
req->method = "";
req->uri = "";
req->version = "";
req->headers = "";
if (!(position = strchr(last_position, ' '))) {
return 1;
}
matchlen = (int)(position - last_position);
req->method = (char *)malloc((matchlen + 1) * sizeof(char));
memcpy(req->method, last_position, matchlen);
req->method[matchlen] = '\0';
last_position = position + 1;
if (!(position = strchr(last_position, ' '))
&& !(position = strstr(last_position, "\r\n"))) {
return 1;
}
// strip any query string out of the URI
if ((temp_position = strchr(last_position, '?')) && temp_position < position)
matchlen = (int)(temp_position - last_position);
else
matchlen = (int)(position - last_position);
req->uri = (char *)malloc((matchlen + 1) * sizeof(char));
memcpy(req->uri, last_position, matchlen);
req->uri[matchlen] = '\0';
if (position[0] == '\r') {
req->version = "0.9";
req->headers = "";
return 0; // simple req -- uri only
}
// If we get here, it's a full request, get the HTTP version and headers
last_position = position + 1;
if (!(position = strstr(last_position, "\r\n"))
|| !(last_position = http_version_str(last_position, "\r\n"))) {
return 1;
}
matchlen = (int)(position - last_position);
req->version = (char *)malloc((matchlen + 1) * sizeof(char));
memcpy(req->version, last_position, matchlen);
req->version[matchlen] = '\0';
last_position = position;
req->headers = (char *)malloc(strlen(last_position) * sizeof(char));
strcpy(req->headers, last_position);
return 0;
}
char *contype(char *ext) {
if (strcmp(ext, "html") == 0) return "text/html";
else if (strcmp(ext, "htm") == 0) return "text/html";
else if (strcmp(ext, "jpeg") == 0) return "image/jpeg";
else if (strcmp(ext, "jpg") == 0) return "image/jpeg";
else if (strcmp(ext, "gif") == 0) return "image/gif";
else if (strcmp(ext, "txt") == 0) return "text/plain";
else return "application/octet-stream";
}
char *status(int statcode) {
if (statcode == 200) return "200 OK";
else if (statcode == 304) return "304 Not Modified";
else if (statcode == 400) return "400 Bad Request";
else if (statcode == 403) return "403 Forbidden";
else if (statcode == 404) return "404 Not Found";
else if (statcode == 500) return "500 Internal Server Error";
else if (statcode == 501) return "501 Not Implemented";
else return "";
}
int send_response(int sockfd, httpreq_t *req, int statcode) {
int urifd;
const int BUFSIZE = 1024;
char sendmessage[BUFSIZE];
char *path = req->uri;
if (req->uri == NULL || req->method == NULL ||
req->headers == NULL || req->version == NULL) {
return 0;
}
if ((path[0] == '/') || ((strstr(path, "http://") == path)
&& (path = strchr(path + 7, '/')))) {
path += 1; // remove leading slash
if (path[0] == '\0') { // substituting in index.html for a blank URL!
path = "index.html";
} else if (path[strlen(path) - 1] == '/') {
//concatenating index.html for a /-terminated URL!
strcat(path, "index.html");
}
} else {
statcode = 400;
}
if (statcode == 200 && (urifd = open(path, O_RDONLY, 0)) < 0) {
if (errno == ENOENT || errno == ENOTDIR) { // file or directory doesn't exist
statcode = 404;
} else if (errno == EACCES) { // access denied
statcode = 403;
} else {
// some other file access problem
statcode = 500;
}
}
if (strstr(path, "..") != NULL) {
statcode = 500;
}
sendmessage[0] = '\0';
if (strcmp(req->version, "0.9") != 0) { // full request
char *ext; // file extension
time_t curtime;
char *imstime;
struct tm tm;
struct stat stbuf;
if (statcode == 200) {
if (ext = strrchr(path, '.')) ext++; // skip the '.'
else ext = "";
} else {
// errors are always html messages
ext = "html";
}
// Conditional GET
if ((strcmp(req->method, "GET") == 0)
&& (statcode == 200)
&& (imstime = get_header(req, "If-Modified-Since"))) {
// Get statistics about the requested URI from the local filesystem
if (stat(path, &stbuf) == -1) {
statcode = 500;
}
if (!strptime(imstime, "%a, %d %b %Y %H:%M:%S GMT", &tm)
&& !strptime(imstime, "%a, %d-%b-%y %H:%M:%S GMT", &tm)
&& !strptime(imstime, "%a %b %d %H:%M:%S %Y", &tm)) {
// badly formatted date
statcode = 400;
}
if (stbuf.st_mtime <= my_timegm(&tm)) {
// Not Modified
statcode = 304;
}
}
time(&curtime); // time for Date: header
strcat(sendmessage, "HTTP/1.0 ");
strcat(sendmessage, status(statcode));
strcat(sendmessage, "\r\nDate: ");
strncat(sendmessage, asctime(gmtime(&curtime)), 24);
strcat(sendmessage, "\r\nServer: Frobozz Magic Software Company Webserver v.002");
strcat(sendmessage, "\r\nConnection: close");
strcat(sendmessage, "\r\nContent-Type: ");
strcat(sendmessage, contype(ext));
strcat(sendmessage, "\r\n\r\n");
}
if (statcode != 200) {
strcat(sendmessage, "<html><head><title>");
strcat(sendmessage, status(statcode));
strcat(sendmessage, "</title></head><body><h2>HTTP/1.0</h2><h1>");
strcat(sendmessage, status(statcode));
strcat(sendmessage, "</h1><h2>URI: ");
strcat(sendmessage, path);
strcat(sendmessage, "</h2></body></html>");
}
if (sendmessage[0] != '\0') {
// send headers as long as there are headers to send
if (send(sockfd, sendmessage, strlen(sendmessage), 0) < 0) {
perror("send");
pthread_exit(NULL);
}
}
if (statcode == 200 && (strcmp(req->method, "HEAD") != 0)) {
// send the requested file as long as there's no error and the
// request wasn't just for the headers
int readbytes;
while (readbytes = read(urifd, sendmessage, BUFSIZE)) {
if (readbytes < 0) {
perror("read");
pthread_exit(NULL);
}
if (send(sockfd, sendmessage, readbytes, 0) < 0) {
perror("send");
pthread_exit(NULL);
}
}
}
}
void *data_thread(void *sockfd_ptr) {
int sockfd = *(int *) sockfd_ptr;
const int BUFSIZE = 5;
char recvmessage[BUFSIZE];
char *headerstr = NULL;
char *newheaderstr = NULL;
int recvbytes = 0;
int curheadlen = 0;
int totalheadlen = 0;
httpreq_t req;
int statcode = 200;
int done = 0;
int seen_header = 0;
char *header_end;
int content_length = 0;
char *qstr;
free(sockfd_ptr); // we have the int value out of this now
recvmessage[BUFSIZE - 1] = '\0'; // mark end of "string"
/* Read incoming client message from the socket */
while(!done && (recvbytes = recv(sockfd, recvmessage, BUFSIZE - 1, 0))) {
if (recvbytes < 0) {
perror("recv");
pthread_exit(NULL);
}
recvmessage[recvbytes] = '\0';
if (seen_header) {
// getting the entity body
content_length -= recvbytes;
if (content_length <= 0) done = 1;
} else {
newheaderstr = (char *) malloc((totalheadlen + recvbytes + 1) * sizeof(char));
newheaderstr[totalheadlen + recvbytes] = '\0';
memcpy(newheaderstr, headerstr, totalheadlen);
memcpy(newheaderstr + totalheadlen, recvmessage, recvbytes);
if (headerstr) free(headerstr);
headerstr = newheaderstr;
totalheadlen += recvbytes;
header_end = strstr(headerstr, "\r\n\r\n");
if (header_end) {
seen_header = 1;
header_end[2] = '\0';
if (parsereq(&req, headerstr) != 0) {
statcode = 400;
}
if (strcmp(req.method, "POST") == 0) {
// grab the body length
char *clenstr = get_header(&req, "Content-Length");
if (clenstr) {
content_length = atoi(clenstr) - ((headerstr + totalheadlen) - header_end - 4);
if (content_length <= 0) done = 1;
free(clenstr);
} else {
statcode = 400; // bad request -- no content length
done = 1;
}
} else {
// This isn't a POST, so there's no entity body
done = 1;
if (strcmp(req.method, "GET") != 0
&& strcmp(req.method, "HEAD") != 0) {
statcode = 501; // unknown request method
}
}
}
}
} // end of recv while loop
// used to deref a NULL pointer here... :(
if (headerstr != NULL) {
printf("%s\n", headerstr);
free(headerstr);
}
send_response(sockfd, &req, statcode);
close(sockfd);
return NULL;
}
int main(int argc, char *argv[]) {
int acc, sockfd, clen, port;
struct hostent *he;
struct sockaddr_in caddr, saddr;
if(argc <= 1) {
fprintf(stderr, "No port specified. Exiting!\n");
exit(1);
}
port = atoi(argv[1]);
/* Obtain name and address for the local host */
if((he=gethostbyname("localhost"))==NULL) {
herror("gethostbyname");
exit(1);
}
/* Open a TCP (Internet Stream) socket */
if((sockfd=socket(AF_INET,SOCK_STREAM,0)) == -1) {
perror("socket");
exit(1);
}
/* Create socket address structure for the local host */
memset((char *) &saddr, '\0', sizeof(saddr));
saddr.sin_family=AF_INET;
saddr.sin_port=htons(port);
saddr.sin_addr.s_addr=htonl(INADDR_ANY);
/* Bind our local address so that the client can send to us */
if(bind(sockfd,(struct sockaddr *) &saddr,sizeof(saddr)) == -1) {
perror("bind");
exit(1);
}
if(listen(sockfd,5) < 0) {
perror("listen");
exit(1);
}
/* Infinite loop for receiving and processing client requests */
for(;;) {
clen=sizeof(caddr);
/* Wait for a connection for a client process */
acc=accept(sockfd,(struct sockaddr *) &caddr,(socklen_t*)&clen);
if(acc < 0) {
perror("accept");
exit(1);
} else {
pthread_t *thread = (pthread_t *) malloc(sizeof(pthread_t));
int *sockfd_ptr = (int *) malloc(sizeof(int));
*sockfd_ptr = acc;
pthread_create(thread, NULL, data_thread, sockfd_ptr);
}
}
return 0;
}
I guess you could have a bound check before copying to the buffer?
For example, add
if(hdrend - hdrptr >= 1024)
exit(1)
before
memcpy((char *)hdrval, hdrptr, (hdrend - hdrptr));
The segfault happens at the point below.
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0xb7ff4b70 (LWP 3902)]
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0xb7ff4b70 (LWP 3902)]
0x08049507 in send_response (sockfd=6, req=0xb7ff4340, statcode=200)
at server/webserver.c:219
warning: Source file is more recent than executable.
219 if (req->uri == NULL || req->method == NULL ||
The memory address is
(gdb) p $_siginfo._sifields._sigfault.si_addr
$3 = (void *) 0x69cb120
The code that needs to be rewritten is
214 int urifd;
215 const int BUFSIZE = 1024;
216 char sendmessage[BUFSIZE];
217 char *path = req->uri;
218
219 if (req->uri == NULL || req->method == NULL ||
220 req->headers == NULL || req->version == NULL) {
221 return 0;

C strip html between <...>

How can i strip the HTML from document between and including the <...> tags in a HTML document using C? My current program uses curl to get the contents of the webpage and puts it into a text file, it then reads from the text file and removes the <>, but i am unsure of how to remove everything between those tags.
#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#define WEBPAGE_URL "http://homepages.paradise.net.nz/adrianfu/index.html"
#define DESTINATION_FILE "/home/user/data.txt"
size_t write_data( void *ptr, size_t size, size_t nmeb, void *stream)
{
return fwrite(ptr,size,nmeb,stream);
}
int main()
{
int in_tag = 0;
char * buffer;
char c;
long lSize;
size_t result;
FILE * file = fopen(DESTINATION_FILE,"w+");
if (file==NULL) {
fputs ("File error",stderr);
exit (1);
}
CURL *handle = curl_easy_init();
curl_easy_setopt(handle,CURLOPT_URL,WEBPAGE_URL); /*Using the http protocol*/
curl_easy_setopt(handle,CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(handle,CURLOPT_WRITEDATA, file);
curl_easy_perform(handle);
curl_easy_cleanup(handle);
int i, nRead, fd;
int source;
char buf[1024];
if((fd = open("data.txt", O_RDONLY)) == -1)
{
printf("Cannot open the file");
}
else
{
nRead = read(fd, buf, 1024);
printf("Original String ");
for(i=0; i<nRead; i++)
{
printf("%c", buf[i]);
}
printf("\nReplaced String ");
for(i=0; i<nRead; i++)
{
if(buf[i]=='<' || buf[i]=='>'){
buf[i]=' ';
}
printf("%c", buf[i]);
}
}
close(source);
return 0;
}
Placing just the code that removes the contents between the '<' and '>' tags (assuming that you deal with proper html, meaning that you don't have one tag nested in the declaration of the other like <html < body> >). I am just changing a small portion of your code. I will also remove the tags from the buf variable, instead of replacing the undesired characters with intervals, because I think this will be more useful to you (correct me if I am wrong).
int idx = 0;
int opened = 0; // false
for(i=0; i<nRead; i++)
{
if(buf[i]=='<') {
opened = 1; // true
} else if (buf[i] == '>') {
opened = 0; // false
} else if (!opened) {
buf[idx++] = buf[i];
}
}
buf[idx] = '\0';
printf("%s\n", buf);
This would also handle scripts and style tags
int stripHTMLTags(char *sToClean,size_t size)
{
int i=0,j=0,k=0;
int flag = 0; // 0: searching for < or & (& as in &bspn; etc), 1: searching for >, 2: searching for ; after &, 3: searching for </script>,</style>, -->
char tempbuf[1024*1024] = "";
char searchbuf[1024] = "";
while(i<size)
{
if(flag == 0)
{
if(sToClean[i] == '<')
{
flag = 1;
tempbuf[0] = '\0';
k=0; // track for <script>,<style>, <!-- --> etc
}
else if(sToClean[i] == '&')
{
flag = 2;
}
else
{
sToClean[j] = sToClean[i];
j++;
}
}
else if(flag == 1)
{
tempbuf[k] = sToClean[i];
k++;
tempbuf[k] = '\0';
//printf("DEBUG: %s\n",tempbuf);
if((0 == strcmp(tempbuf,"script")))
{
flag = 3;
strcpy(searchbuf,"</script>");
//printf("DEBUG: Detected %s\n",tempbuf);
tempbuf[0] = '\0';
k = 0;
}
else if((0 == strcmp(tempbuf,"style")))
{
flag = 3;
strcpy(searchbuf,"</style>");
//printf("DEBUG: Detected %s\n",tempbuf);
tempbuf[0] = '\0';
k = 0;
}
else if((0 == strcmp(tempbuf,"!--")))
{
flag = 3;
strcpy(searchbuf,"-->");
//printf("DEBUG: Detected %s\n",tempbuf);
tempbuf[0] = '\0';
k = 0;
}
if(sToClean[i] == '>')
{
sToClean[j] = ' ';
j++;
flag = 0;
}
}
else if(flag == 2)
{
if(sToClean[i] == ';')
{
sToClean[j] = ' ';
j++;
flag = 0;
}
}
else if(flag == 3)
{
tempbuf[k] = sToClean[i];
k++;
tempbuf[k] = '\0';
//printf("DEBUG: %s\n",tempbuf);
//printf("DEBUG: Searching for %s\n",searchbuf);
if(0 == strcmp(&tempbuf[0] + k - strlen(searchbuf),searchbuf))
{
flag = 0;
//printf("DEBUG: Detected END OF %s\n",searchbuf);
searchbuf[0] = '\0';
tempbuf[0] = '\0';
k = 0;
}
}
i++;
}
sToClean[j] = '\0';
return j;
}

Resources