Beginning of the string snprintf doesn't show

Beginning of the string snprintf doesn't show - c

I'm trying to use snprintf to write a request string into the buffer. The first iteration works fine, however, after the second iteration it the beginning of the string disappears.
The function which write into the buffer
char* http_get_request(url_info *info) {
char * request_buffer = (char *) malloc(100 + strlen(info->path) + strlen(info->host)); //malloc spaces for the request buffer pointer
memset(request_buffer, 0, sizeof(*request_buffer));
puts("http get request req_buf address:");
printf("%p\n", request_buffer);
// puts(info->path);
// puts("INFO PATH ADDRESS:");
// printf("%p\n",&info->path);
snprintf(request_buffer, 1024, "GET /%s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n",
info->path, info->host); //writes: "GET <PATH> HTTP/1.1\r\nHost: <HOSTNAME>\r\nConnection: close\r\n\r\n" into the buffer
return request_buffer;
}
Relevant part of the download_page function:
puts("REQUEST BUFFER");
printf("%p\n",request_buffer);
puts(request_buffer);
//retrieve request string
if (connect(mysocket, ptr-> ai_addr, ptr-> ai_addrlen)){
fprintf(stderr, "Could not connect: %s\n", strerror(errno));
return -1;
}
send(mysocket, request_buffer, strlen(request_buffer), 0);
//between receiving data from the server and storing in the buffer
//Some aribitrary big number for length of data buffer written into file
shutdown(mysocket, SHUT_WR); //further transmissions disallowed
free(request_buffer);
Relevant part which iterates are keeps on generating new requests.
// Let's first isolate the first line of the reply
char *status_line = next_line(reply->reply_buffer, reply->reply_buffer_length);
if (status_line == NULL) {
fprintf(stderr, "Could not find status\n");
return 5;
}
*status_line = '\0'; // Make the first line is a null-terminated string
// Now let's read the status (parsing the first line)
int status;
double http_version;
int rv = sscanf(reply->reply_buffer, "HTTP/%lf %d", &http_version, &status);
if (rv != 2) { // if there are not two values http_version and status then return error.
fprintf(stderr, "Could not parse http response first line (rv=%d, %s)\n", rv, reply->reply_buffer);
return 6;
}
int i = 0;
struct http_reply newReply;
url_info info;
while (status == 301 && i < 5){
char *buf = status_line + 2;
char *start = strstr(buf,"Location: ") +10;
char *end = strchr(start, '\r');
size_t diff = end - start +1;
char newUrl[diff];
strncpy(newUrl, start, diff);
printf("New Url:\n%s\n",newUrl);
printf("info pointer: %p\n", &info);
int ret = parse_url(newUrl, &info);
if (ret) {
fprintf(stderr, "Could not parse URL '%s': %s\n", newUrl, parse_url_errstr[ret]);
return 2;
}
ret = download_page(&info, &newReply);
if (ret) {
return 3;
}
char *status_line = next_line(newReply.reply_buffer, newReply.reply_buffer_length);
if (status_line == NULL) {
fprintf(stderr, "Could not find status\n");
return 5;
}
*status_line = '\0'; // Make the first line is a null-terminated string
// Now let's read the status (parsing the first line)
int status;
double http_version;
int rv = sscanf(newReply.reply_buffer, "HTTP/%lf %d", &http_version, &status);
//write to the reply->reply_buffer a reply value.
//seperate the first line, the http reply, then the headers then adat.
if (rv != 2) { // if there are not two values http_version and status then return error.
fprintf(stderr, "Could not parse http response first line (rv=%d, %s)\n", rv, newReply.reply_buffer);
return 6;
}
i++;
if (status == 301){
free(newReply.reply_buffer);
}
}
//if there was at least one redirect,
if (i!=0){
reply = &newReply;
}
if (status != 200) {
fprintf(stderr, "Server returned status %d (should be 200)\n", status);
return 0;
}
//check status redirect, send to the new location.
char *buf = status_line + 2;
char *ptr = reply->reply_buffer;
while(1 == 1){
status_line = next_line(ptr, reply->reply_buffer_length);
buf = status_line +2;
if (ptr+ 2 == buf) break;
ptr = buf;
}
write_data(file_name, buf, reply->reply_buffer + reply->reply_buffer_length - buf);
return 0;
}
This is the output:
Yis-MacBook-Pro:ex05-sockets yao$ ./wgetX http://redirect.epizeuxis.net/make
http get request req_buf address:
0x7fac3e6063c0
REQUEST BUFFER
0x7fac3e6063c0
GET /make HTTP/1.1
Host: redirect.epizeuxis.net
Connection: close
New Url:
http://info.cern.ch/
info pointer: 0x7ffee495a7e0
http get request req_buf address:
0x7fac3e6063c0
REQUEST BUFFER
0x7fac3e6063c0
HTTP/1.1
Host: info.cern.ch
Connection: close
New Url:
http://info.cern.ch/
info pointer: 0x7ffee495a7e0
http get request req_buf address:
0x7fac3e50fe70
REQUEST BUFFER
0x7fac3e50fe70
HTTP/1.1
Host: info.cern.ch
Connection: close
New Url:
http://info.cern.ch/
info pointer: 0x7ffee495a7e0
http get request req_buf address:
0x7fac3fa04080
REQUEST BUFFER
0x7fac3fa04080
HTTP/1.1
Host: info.cern.ch
Connection: close
New Url:
http://info.cern.ch/
info pointer: 0x7ffee495a7e0
http get request req_buf address:
0x7fac3e6063c0
REQUEST BUFFER
0x7fac3e6063c0
HTTP/1.1
Host: info.cern.ch
Connection: close

Related

Why do I get 301 Moved Permanently

Why does this code result in 301 error when trying to access sites that has a .net suffix
void test(const char * host, const char *index)
{
BIO *bio, *out;
SSL_CTX * ctx;
SSL * ssl;
int len;
char tmpbuf[1024];
ERR_load_crypto_strings();
char ready[1204];
char format[] = "%s:http";
sprintf(ready, format , host);
char req_template[] = "GET %s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n";
char ready_request[1024];
sprintf(ready_request , req_template , index, host);
const SSL_METHOD * method = SSLv23_client_method();
if (!method)
exit(-1);
ctx = SSL_CTX_new(method);
if (!ctx)
exit(-1);
if (!SSL_CTX_load_verify_locations(ctx,"/etc/ssl/certs/ca-certificates.crt","/etc/ssl/certs/"))
{
SSL_CTX_free(ctx);
exit(-1);
}
bio = BIO_new_ssl_connect(ctx);
BIO_get_ssl(bio, &ssl);
SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
char temp[1024];
sprintf(temp, "%s:https",host);
if (BIO_set_conn_hostname(bio, temp) < 0)
{
memset(temp, 0, sizeof(temp));
sprintf(temp, "%s:http", host);
bio = BIO_new_connect(temp);
if (BIO_do_connect(bio) < 0 )
{
BIO_free_all(bio);
SSL_CTX_free(ctx);
exit(-1);
}
}
printf("###\n%s\n###\n",ready_request);
out = BIO_new_fp(stdout, BIO_NOCLOSE);
if(BIO_do_connect(bio) <= 0)
exit(-1);
BIO_puts(bio,ready_request);
for(;;)
{
len = BIO_read(bio, tmpbuf, 1024);
if(len <= 0) break;
BIO_write(out, tmpbuf, len);
}
BIO_free(bio);
BIO_free(out);
}
int main()
{
test("openssl.org", "/docs/manpages.html");
test("pastebin.com", "/raw/j0BnRwBw");
test("pastebin.com", "/j0BnRwBw");
}
for some reason that i can't figure out the first time test is called it returns a 301 status code but the two time test is called it returns the html code or the paste code with out any problems
Does this have anything to do with the fact that the websites use different technologies or if they have some sort of firewall, I believe pastebin uses cloudflare to protect it self, I also tried using User-Agent but still got the same result

Just add www. as a prefix to the host header and for BIO_set_conn_hostname you should use the format www.<hostname>.com:https or www.<host>.org:http for BIO_new_connect
for some reason, the docs do not mention this
void test(const char * host, const char *index)
{
BIO *bio, *out;
SSL_CTX * ctx;
SSL * ssl;
int len;
char tmpbuf[1024];
ERR_load_crypto_strings();
char req_template[] = "GET %s HTTP/1.1\r\nHost: www.%s\r\nConnection: close\r\n\r\n";
char ready_request[1024];
sprintf(ready_request , req_template , index, host);
const SSL_METHOD * method = SSLv23_client_method();
if (!method)
exit(-1);
ctx = SSL_CTX_new(method);
if (!ctx)
exit(-1);
if (!SSL_CTX_load_verify_locations(ctx,"/etc/ssl/certs/ca-certificates.crt","/etc/ssl/certs/"))
{
SSL_CTX_free(ctx);
exit(-1);
}
bio = BIO_new_ssl_connect(ctx);
BIO_get_ssl(bio, &ssl);
SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
char temp[1024];
sprintf(temp, "www.%s:https",host);
if (BIO_set_conn_hostname(bio, temp) < 0)
{
memset(temp, 0, sizeof(temp));
sprintf(temp, "www.%s:http", host);
bio = BIO_new_connect(temp);
if (BIO_do_connect(bio) < 0 )
{
BIO_free_all(bio);
SSL_CTX_free(ctx);
exit(-1);
}
}
printf("###\n%s\n###\n",ready_request);
out = BIO_new_fp(stdout, BIO_NOCLOSE);
if(BIO_do_connect(bio) <= 0)
exit(-1);
BIO_puts(bio,ready_request);
for(;;)
{
len = BIO_read(bio, tmpbuf, 1024);
if(len <= 0) break;
BIO_write(out, tmpbuf, len);
}
BIO_free(bio);
BIO_free(out);
}

The first request https://openssl.org/docs/manpages.html returns:
HTTP/1.1 301 Moved Permanently
...
Location: https://www.openssl.org/docs/manpages.html
...
You make another request to that url. To demonstrate it working, I changed your first test case to read:
test("www.openssl.org", "/docs/manpages.html");
// ^^^^
and the server now returns the response you were expecting:
HTTP/1.1 200 OK
...

What to do when http header wrongly reports content-length

I am trying to download web pages over https by first downloading the headers with a HEAD request, then parsing to obtain the Content-Length and then using the Content-Length plus some space for headers to allocate memory for a buffer to store results from a GET request. It seems that stackoverflow.com gives a Content-Length that is too small and thus my code segfaults.
I've tried looking through stack overflow past questions to see how to go about dynamically allocating memory to handle pages which misreport their Content-Length but haven't been able to find any suitable answers.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <openssl/bio.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#define MAX_HEADER_SIZE 8192
/**
* Main SSL demonstration code entry point
*/
int main() {
char* host_and_port = "stackoverflow.com:443";
char* head_request = "HEAD / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n";
char* get_request = "GET / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n";
char* store_path = "mycert.pem";
char *header_token, *line_token, content_length_line[1024];
char *cmp = "\r\n";
char *html;
char *get;
int content_length;
size_t i = 0;
char buffer[MAX_HEADER_SIZE];
buffer[0] = 0;
BIO* bio;
SSL_CTX* ctx = NULL;
SSL* ssl = NULL;
/* initilise the OpenSSL library */
SSL_load_error_strings();
SSL_library_init();
ERR_load_BIO_strings();
OpenSSL_add_all_algorithms();
bio = NULL;
int r = 0;
/* Set up the SSL pointers */
ctx = SSL_CTX_new(TLS_client_method());
ssl = NULL;
r = SSL_CTX_load_verify_locations(ctx, store_path, NULL);
if (r == 0) {
fprintf(stdout,"Unable to load the trust store from %s.\n", store_path);
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
}
/* Setting up the BIO SSL object */
bio = BIO_new_ssl_connect(ctx);
BIO_get_ssl(bio, &ssl);
if (!(ssl)) {
printf("Unable to allocate SSL pointer.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
bio = NULL;
}
SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
/* Attempt to connect */
BIO_set_conn_hostname(bio, host_and_port);
/* Verify the connection opened and perform the handshake */
if (BIO_do_connect(bio) < 1) {
fprintf(stdout, "Unable to connect BIO.%s\n", host_and_port);
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
bio = NULL;
}
if (SSL_get_verify_result(ssl) != X509_V_OK) {
printf("Unable to verify connection result.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
}
if (bio == NULL)
return (EXIT_FAILURE);
r = -1;
while (r < 0) {
r = BIO_write(bio, head_request, strlen(head_request));
if (r <= 0) {
if (!BIO_should_retry(bio)) {
printf("BIO_read should retry test failed.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
}
/* It would be prudent to check the reason for the retry and handle
* it appropriately here */
}
}
r = -1;
while (r < 0) {
r = BIO_read(bio, buffer, MAX_HEADER_SIZE);
if (r == 0) {
printf("Reached the end of the data stream.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
} else if (r < 0) {
if (!BIO_should_retry(bio)) {
printf("BIO_read should retry test failed.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
}
/* It would be prudent to check the reason for the retry and handle
* it appropriately here */
}
};
printf("%s\r\n", buffer);
header_token = strtok(buffer, cmp);
while (header_token != NULL)
{
//printf ("header_token: %s\n\n", header_token);
if (strncmp(header_token, "Content-Length:", strlen("Content-Length:")) == 0
|| strncmp(header_token, "content-length:", strlen("content-length:")) == 0)
{
//printf ("header_token %s is equal to Content-Length:\n", header_token);
strcpy(content_length_line, header_token);
}
header_token = strtok(NULL, cmp);
}
if (strlen(content_length_line) > 0)
{
line_token = strtok(content_length_line, " ");
line_token = strtok(NULL, " ");
content_length = atoi(line_token);
printf ("Content-Length = %d\n", content_length);
}
//char get[content_length + MAX_HEADER_SIZE];
get = malloc((content_length + MAX_HEADER_SIZE)*sizeof(char));
if (get == NULL) {
fprintf(stdout, "Out of memory\n");
return (EXIT_FAILURE);
}
r = -1;
while (r < 0) {
r = BIO_write(bio, get_request, strlen(get_request));
if (r <= 0) {
if (!BIO_should_retry(bio)) {
printf("BIO_read should retry test failed.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
}
/* It would be prudent to check the reason for the retry and handle
* it appropriately here */
}
}
r = -1;
while (r) {
while (r < 0) {
r = BIO_read(bio, buffer, 4096);
if (r == 0) {
printf("Reached the end of the data stream.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
} else if (r < 0) {
if (!BIO_should_retry(bio)) {
printf("BIO_read should retry test failed.\n");
fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
ERR_print_errors_fp(stdout);
continue;
}
/* It would be prudent to check the reason for the retry and handle
* it appropriately here */
}
};
printf("Received %d bytes\n",r);
printf("Received total of %ld bytes of %d\n", i+r, content_length);
memcpy(get+i, buffer, r);
i += r;
}
printf("%s\r\n", buffer);
/* clean up the SSL context resources for the encrypted link */
SSL_CTX_free(ctx);
free(get);
return (EXIT_SUCCESS);
}
I would usually expect to be able to print out the full web page but because of the erroneous Content-Length I get the following output and segfault.
Received 1752 bytes
Received total of 248784 bytes of 105585
Program received signal SIGSEGV, Segmentation fault.
__memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:404
404 ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: No such file or directory.
How should I handle pages that give incorrect Content-Length?

The Content-length in the response to a HEAD request is of no relevance. Only the Content-length in the response containing the actual body is relevant (i.e. response to GET, POST...). And this Content-length should be used to read the HTTP body, i.e. first read the HTTP header, determine the length and then read the body as specified. Even if more data could be read they don't belong to the response body.
Apart from that you are doing a HTTP/1.1 request. This means that the server might use Transfer-Encoding: chunked in which case the value of Content-length is irrelevant too. Instead chunked encoding takes preference and you need to read all the chunks of the body based on the length of each given chunk.

Error malloc(): memory corruption

I want to receive messages response from server so I wrote the function bellow:
char * receive_response(SSL *ssl, BIO *outbio) {
int bytes;
int received = 0;
char *resp;
resp = (char *) malloc(4096*sizeof(char));
bytes = SSL_read(ssl, resp, 4096);
resp[strlen(resp)] = '\0';
if (bytes < 0) {
BIO_printf(outbio, "\nError reading...\n");
exit(1);
}
received += bytes;
BIO_printf(outbio, "Received...%d bytes\n", received);
BIO_printf(outbio, "%s", resp);
BIO_printf(outbio, "Receive DONE\n");
return resp;
}
But I get the error: malloc():memory corruption when I run it.
The strange thing is it occurs when I call this function at the second times in main. It's ok at the first time. Please help me to understand it.

Your string is not yet terminated with a '\0', so you can't call strlen on it:
char * receive_response(SSL *ssl, BIO *outbio) {
int bytes;
int received = 0;
char *resp;
// add one extra room if 4096 bytes are effectivly got
resp = malloc(4096+1);
if (NULL == resp)
{
perror("malloc");
exit(1);
}
bytes = SSL_read(ssl, resp, 4096);
if (bytes < 0) {
BIO_printf(outbio, "\nError reading...\n");
exit(1);
}
resp[bytes] = '\0';
received += bytes;
BIO_printf(outbio, "Received...%d bytes\n", received);
BIO_printf(outbio, "%s", resp);
BIO_printf(outbio, "Receive DONE\n");
return resp;
}
Another solution could be to called calloc instead of malloc...

Pulling a specific number out of a returned message

I'm writing a program to simulate a very basic internet explorer using the GET function in Linux. The only problem I seem to be having is that I'm unsure how to pull value for the content length out of the returned value. Right now I have numbers hard coded in, but they only really work for my file size, and not one any bigger or smaller. If I could manage to parse the content length, I could fix this issue. If someone could put me on the right path to parsing that, I would be very appreciative.
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
/* VSIE client program uses TCP protocol to connect to the remote http server.
The program will take 2 input arguments:
1) command option, get (receive) or head (send)
2) http URL address
*/
#define MAX 80
#define MAX2 1024
#define http "HTTP/1.1"
#define TRUE 1
#define FALSE 0
#define HEADERSTOP "\n\n"
main(int argc, char *argv[])
{
unsigned char *e;
char command[MAX];
char server[MAX];
char path[MAX];
char filename[MAX]= "";
char httpString[MAX];
int i, x, f, n, length = 0;
int numBytes = 0;
int getData = TRUE;
int getFlag = FALSE;
int flag = FALSE;
int headFlag = FALSE;
FILE *in;
int sk;
unsigned char buf[MAX2];
struct sockaddr_in remote;
struct hostent *hp;
struct servent *sp;
short port = 0;
// parse input arguments
sscanf(argv[2],"%[^'/']%s",server,path);
if (strcmp(argv[1],"-get") == 0)
{
sprintf(command, "GET");
getFlag = TRUE;
}
else if (strcmp(argv[1],"-head") == 0)
{
sprintf(command, "HEAD");
}
//build http 1.1 GET or HEAD message
sprintf(httpString,"%s %s %s\nHost: %s\n\n", command, path,http,server);
printf("command = %s, server = %s, path = %s\n", command, server, path);
printf("httpString = %s\n",httpString);
//parse filename from path
length = strlen(path);
x=0;
f=0;
for(i = 0; i < length; i++)
{
//printf("path[%d] = %c \n",i,path[i]);
if ((flag == TRUE) & (f == 2))
{
filename[x] = path[i];
x++;
}
if (path[i] == '/')
{
flag = TRUE;
f++;
}
}
printf("filename = %s\n", filename);
//if command = get, open filename
//if(command == "-get")
if (getFlag == TRUE)
{
if((in = fopen (filename,"w")) == NULL)
{
//printf("FAILURE: opening input file %s\n",filename);
perror("FAILURE: opening input file");
exit(1);
}
printf("file opened successfully\n");
}
//get internet address of host & port number of http service
hp = gethostbyname(server);
if (hp == NULL)
{
printf("Can't find host name. %s\n", server);
exit (1);
}
//copy the h_addr (source) to s_add (destination) for n bytes specified by length
bcopy(hp->h_addr,&remote.sin_addr.s_addr,hp->h_length);
/* get the port number */
sp = getservbyname("http", "tcp");
if (sp == NULL)
{
printf("can't find port # %d\n",sp->s_port);
exit (1);
}
port = sp->s_port;
remote.sin_port = sp->s_port;
printf("port = %d, port = %d \n", port, remote.sin_port);
//create socket for http server - socket type: Sock_Stream, protocol: TCP
sk = socket(AF_INET,SOCK_STREAM,0);
if (sk < 0)
{
perror("error opening socket");
exit(1);
}
remote.sin_family = AF_INET;
//initiate connection to the server address w/ TCP socket
if (connect(sk, (struct sockaddr *) &remote, sizeof(remote)) < 0)
{
printf("connect fails!\n");
exit(1);
}
printf("connection successful\n");
//send http message
printf("send message:%s\n", httpString);
//send(sk,httpString,strlen(httpString)+1,0);
if(send(sk,httpString,sizeof(httpString),0) < 0)
{
printf("send() failed");
//exit(1);
}
n = 1;
//Loop until all data received
while(getData == TRUE)
{
//wait for and print the return message
numBytes = recv(sk,buf,sizeof(buf),0);
if (numBytes < 0)
{
perror("error reading from socket");
break;
}
else if (numBytes < MAX2)
{
getData = FALSE;
printf("***end while loop****\n");
}
if (headFlag == FALSE){
e = memchr(buf, '\n', sizeof(buf));
while (*(e+1) != '\r'){
e = memchr(e+1, '\n', sizeof(buf));
}
headFlag = TRUE;
}
printf("\n****number of bytes received %d****\n",numBytes);
//saved the retrieved content into the file (input argument)
if (getFlag == TRUE)
{
//printf("write output\n");
printf("%.*s\n", (numBytes-763), buf);
if(e != NULL){
fwrite(e, numBytes, 1, in);
e = NULL;
}else{
fwrite(buf, numBytes, 1, in);
}
}
n++;
} // end while()
//close socket & file
close(sk);
if(fclose(in) !=0)
{
perror("FAILURE: Closing input file");
exit(1);
}
return 0;
} //end main()
The returned information is:
****number of bytes received 1024****
HTTP/1.1 200 OK
Date: Tue, 08 Apr 2014 17:37:10 GMT
Server: Apache/2.2.22 (Ubuntu)
Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT
ETag: "1724117-9fb-4f2b373fef880"
Accept-Ranges: bytes
Content-Length: 2555
Vary: Accept-Encoding
Content-Type: text/html

Here is some code that shows you one way to do it. The bit that matters for you is
strtok - split the string into lines
strstr - find a string that contains the words you are looking for
sscanf - scan the line for the value of the integer
Everything else is just there to make the example work.
#include <stdio.h>
#include <string.h>
int main(void) {
char httpString[]="HTTP/1.1 200 OK\n"\
"Date: Tue, 08 Apr 2014 17:37:10 GMT\n"\
"Server: Apache/2.2.22 (Ubuntu)\n"\
"Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT\n"\
"ETag: \"1724117-9fb-4f2b373fef880\"\n"\
"Accept-Ranges: bytes\n"\
"Content-Length: 2555\n"\
"Vary: Accept-Encoding\n"\
"Content-Type: text/html\n";
printf("string is %s\n", httpString);
char *line;
line = strtok(httpString, "\n");
while(line != NULL) {
if (strstr(line, "Content-Length:")!= NULL) {
int theNumber;
sscanf(line, "Content-Length: %d", &theNumber);
printf("The number is %d\n", theNumber);
}
line = strtok(NULL, "\n");
}
return 0;
}
Output:
string is HTTP/1.1 200 OK
Date: Tue, 08 Apr 2014 17:37:10 GMT
Server: Apache/2.2.22 (Ubuntu)
Last-Modified: Tue, 18 Feb 2014 19:41:06 GMT
ETag: "1724117-9fb-4f2b373fef880"
Accept-Ranges: bytes
Content-Length: 2555
Vary: Accept-Encoding
Content-Type: text/html
The number is 2555
Alternatively - as mentioned by #enhzflep in the comments, if you look for the index of the string Content-length: in the original, then you can do a sscanf with a string that starts right after that point:
char searchString[] = "Content-length:";
int offset, number;
offset = strstr(httpString, searchString);
sscanf(offset + strlen(searchString), "%d", &number);

How to get the file name of each of the connections (webpage)

We are trying to write a multi-threaded web server but we don't know how to get the file name from each of the HTTP requests (web pages) to a simple server. We are also concerned about the size of each of these files as well. Any idea?
Here is our main for the server.c file:
int main(int argc, char *argv[])
{
int listenfd, connfd, port, clientlen;
struct sockaddr_in clientaddr;
getargs(&port, argc, argv);
listenfd = Open_listenfd(port);
thread_pool_init();
for(;;){
pthread_mutex_lock(&pool_lock);
while(buf_count == request_limit)
pthread_cond_wait(&signal_worker, &pool_lock);
clientlen = sizeof(clientaddr);
connfd = Accept(listenfd, (SA *)&clientaddr, (socklen_t *) &clientlen);
//get/parse html file name here
//get file size using stat
put(connfd);
pthread_cond_signal(&signal_worker);
pthread_mutex_unlock(&pool_lock);
}
Our open_connection code in client.c which sends HTTP requests to the server.c file, looks like this :
void * open_connection( ){
clientfd = Open_clientfd(host, port);
clientSend(clientfd, filename);
pthread_mutex_lock(&lock);
clientPrint(clientfd);
pthread_mutex_unlock(&lock);
Close(clientfd);
sem_post(&cond);
return NULL;
}
//Send an HTTP request for the specified file
void clientSend(int fd, char *filename)
{
char buf[MAXLINE];
char hostname[MAXLINE];
Gethostname(hostname, MAXLINE);
//Form and send the HTTP request
sprintf(buf, "GET %s HTTP/1.1\n", filename);
sprintf(buf, "%shost: %s\n\r\n", buf, hostname);
Rio_writen(fd, buf, strlen(buf));
}

Once you receive the HTTP request you need to parse it to retrieve the name of the file requested and then send the file back to the client.
I post a simple code that can be used to handle HTTP request which I have used in one of my experiment. It is really simple, it does not take into account a lot of different characteristics of the HTTP protocol, basically it works only with GET request, but it may be a good starting point.
The recv_request is a function that reads the request from the socket used to communicate with the client.
#define PORT 80
#define WEBROOT "/var/www/localhost/htdocs/"
void handle_connection(int sockfd, struct sockaddr_in *client_addr_ptr) {
unsigned char *ptr, request[REQUEST], resource[REQUEST];
int fd, length;
memset(request, 0, REQUEST);
memset(resource, 0, REQUEST);
length = recv_request(sockfd, request);
printf("Got request from %s:%d lenght: %d \n", inet_ntoa(client_addr_ptr->sin_addr), ntohs(client_addr_ptr->sin_port),length);
puts("--------------------------------\n");
printf("%.*s", 500, request);
puts("--------------------------------");
ptr = strstr(request, " HTTP/"); // search for valid looking request
if(ptr == NULL) { // then this isn't valid HTTP
printf(" NOT HTTP!\n");
} else {
*ptr = 0; // terminate the buffer at the end of the URL
ptr = NULL; // set ptr to NULL (used to flag for an invalid request)
if(strncmp(request, "GET ", 4) == 0) // get request
ptr = request+4; // ptr is the URL
if(strncmp(request, "HEAD ", 5) == 0) // head request
ptr = request+5; // ptr is the URL
if(ptr == NULL) { // then this is not a recognized request
printf("\tUNKNOWN REQUEST!\n");
} else { // valid request, with ptr pointing to the resource name
if (ptr[strlen(ptr) - 1] == '/') // for resources ending with '/'
strcat(ptr, "index.html"); // add 'index.html' to the end
strcpy(resource, WEBROOT); // begin resource with web root path
strcat(resource, ptr); // and join it with resource path
fd = open(resource, O_RDONLY, 0); // try to open the file
printf("Opening \'%s\'\t", resource);
if(fd == -1) { // if file is not found
printf(" 404 Not Found\n");
send_string(sockfd, "HTTP/1.0 404 NOT FOUND\r\n");
send_string(sockfd, "Server: Tiny webserver\r\n\r\n");
send_string(sockfd, "<html><head><title>404 Not Found</title></head>");
send_string(sockfd, "<body><h1>URL not found</h1></body></html>\r\n");
} else { // otherwise, serve up the file
printf(" 200 OK\n\n");
send_string(sockfd, "HTTP/1.0 200 OK\r\n");
send_string(sockfd, "Server: Tiny webserver\r\n\r\n");
if(ptr == request + 4) { // then this is a GET request
if( (length = get_file_size(fd)) == -1)
fatal("getting resource file size");
if( (ptr = (unsigned char *) malloc(length)) == NULL)
fatal("allocating memory for reading resource");
read(fd, ptr, length); // read the file into memory
write(sockfd, ptr, length); // send it to socket
free(ptr); // free file memory
}
close(fd); // close the file
} // end if block for file found/not found
} // end if block for valid request
} // end if block for valid HTTP
shutdown(sockfd, SHUT_RDWR); // close the socket gracefully
return;
}
You should give a look at the curl library.