C libcurl print webpage contents

C libcurl print webpage contents - c

I'm a total noob when it comes to C but i found this curl example here http://curl.haxx.se/libcurl/c/getinmemory.html
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
struct MemoryStruct {
char *memory;
size_t size;
};
static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if (mem->memory == NULL) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
exit(EXIT_FAILURE);
}
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
int main(void)
{
CURL *curl_handle;
struct MemoryStruct chunk;
chunk.memory = malloc(1); /* will be grown as needed by the realloc above */
chunk.size = 0; /* no data at this point */
curl_global_init(CURL_GLOBAL_ALL);
/* init the curl session */
curl_handle = curl_easy_init();
/* specify URL to get */
curl_easy_setopt(curl_handle, CURLOPT_URL, "http://www.example.com/");
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
/* some servers don't like requests that are made without a user-agent
field, so we provide one */
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
/* get it! */
curl_easy_perform(curl_handle);
/* cleanup curl stuff */
curl_easy_cleanup(curl_handle);
/*
* Now, our chunk.memory points to a memory block that is chunk.size
* bytes big and contains the remote file.
*
* Do something nice with it!
*
* You should be aware of the fact that at this point we might have an
* allocated data block, and nothing has yet deallocated that data. So when
* you're done with it, you should free() it as a nice application.
*/
printf("%lu bytes retrieved\n", (long)chunk.size);
if(chunk.memory)
free(chunk.memory);
/* we're done with libcurl, so clean it up */
curl_global_cleanup();
return 0;
}
It works perfect for me, but is it possible to tweak this thing a bit so it prints the content and not the size of the webpage contents?
Thanks in advance!

Have you tried:
printf("%lu bytes retrieved\n", (long)chunk.size);
printf("%s", chunk.memory);
You might be surprised.

Related

client C for API calls with curl_multi_*

I would like to create a C client that makes asynchronous API calls with lib curl and saves the responses, the calls are about a hundred at the same time. I have been looking for internet tutorials and examples for curl_multi_ * and curl_multi_socket with epoll for 4 days (I use linux) but they seem not to exist, and those few examples are not understandable to someone who is a beginner like me. Apparently I'm the only one interested in doing such a thing in C.
I also looked at the official documentation examples, but it uses a maximum of 2 connections at the same time and to do this declares two variables and calls curl_easy_init(), but the problem is that the requests made by the program are not a precise number so I cannot declare a number of variables a priori (even though it's not possible to declare 100 variables).
I found out this example of curl_multi_socket with epoll is difficult to understand and replicate for my case without an explanation of how it works.
Is there anyone who can give me a code example on how to use curl_multi_ * for multiple simultaneous connections to start with? it would be much appreciated.
EDIT:
after hours of research, I finally found an example that might be fit, the problem is that it crashes often and for various reasons
#define NUM_URLS 64
typedef struct data { // 24 / 24 Bytes
struct curl_slist * header;
char ** sub_match_json;
int nbr_sub_match;
int response_counter;
} data_t;
// list of the same URL repeated multiple times
// assume there are 64 url for example
static char *urls[] = {}
void make_header(data_t * data) {
//many curl_slist_append();
}
void init_data(data_t *data) {
data->sub_match_json = (char **)malloc(sizeof(char *) * NUM_URLS);
data->response_counter = 0;
data->nbr_sub_match = NUM_URLS;
make_header(data);
}
static size_t write_cb(void *response, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
data_t * data = (data_t *) userp;
data->sub_match_json[data->response_counter] = malloc(realsize + 1);
if(data->sub_match_json[data->response_counter] == NULL)
{
fprintf(stderr, "Memory allocation failed: %s\n", strerror(errno));
return 0; /* out of memory! */
}
memcpy(data->sub_match_json[data->response_counter], response, realsize);
data->sub_match_json[data->response_counter][realsize] = 0;
data->response_counter++;
return realsize;
}
static void add_transfer(CURLM *cm, int i, data_t *data)
{
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 1<<23);
// curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
// curl_easy_setopt(curl, CURLOPT_TCP_FASTOPEN, 1L);
// curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, data->header);
curl_easy_setopt(curl, CURLOPT_HEADER, 1L);
curl_easy_setopt(curl, CURLOPT_URL, urls[i]);
curl_easy_setopt(curl, CURLOPT_PRIVATE, urls[i]);
curl_multi_add_handle(cm, curl);
}
int main(void)
{
CURLM *cm;
CURLMsg *msg;
data_t global_data;
unsigned int transfers = 0;
int msgs_left = -1;
int still_alive = 1;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
init_data(NULL, &global_data); // my function
/* Limit the amount of simultaneous connections curl should allow: */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX_PARALLEL);
for(transfers = 0; transfers < MAX_PARALLEL; transfers++)
add_transfer(cm, transfers, &global_data);
do {
curl_multi_perform(cm, &still_alive);
while((msg = curl_multi_info_read(cm, &msgs_left))) {
if(msg->msg == CURLMSG_DONE) {
char *url;
CURL *e = msg->easy_handle;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &url);
fprintf(stderr, "R: %d - %s <%s>\n",
msg->data.result, curl_easy_strerror(msg->data.result), url);
curl_multi_remove_handle(cm, e);
curl_easy_cleanup(e);
}
else {
fprintf(stderr, "E: CURLMsg (%d)\n", msg->msg);
}
if(transfers < global_data.nbr_sub_match)
add_transfer(cm, transfers++, &global_data);
}
if(still_alive)
curl_multi_wait(cm, NULL, 0, 1000, NULL);
} while(still_alive || (transfers < NUM_URLS));
curl_multi_cleanup(cm);
curl_global_cleanup();
while (global_data.response_counter-- >= 0) {
printf("%s\n", global_data.sub_match_json[global_data.response_counter]);
}
return EXIT_SUCCESS;
}
Error:
api_calls(75984,0x100088580) malloc: Incorrect checksum for freed object 0x100604c30: probably modified after being freed.
Corrupt value: 0x600002931f10
api_calls(75984,0x100088580) malloc: *** set a breakpoint in malloc_error_break to debug
this is on curl_easy_cleanup(e);
Exception has occurred.
EXC_BAD_ACCESS (code=1, address=0x0)
otherwise, when no error occurs, in sub_match_json there are bytes and no char. Why this ?

curl aternative to InternetReadFile

I wanted to check whether curl has any alternative like InternetReadFile which returns the content with size specified in the buffer size.
I have used:
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, Read_Cb);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &ReadBuffer);
curl_easy_perform(curl_handle);
But my Read_Cb gets called back multiple time (which is documented behaviour) and that is fine.
I want curl_easy_perform to return when my buffer size is reached. I explored CURLOPT_BUFFERSIZE, but that doesn't seem to help here.
CURLE_WRITE_ERROR is a problem becuase it aborts the transfer. I could have returned something from my callback which will gracefully tell curl to return curl_easy_perform.

Does CURLOPT_RANGE help?
CURL *curl = curl_easy_init();
if (curl)
{
size_t from = 0, to = 1024;
char range[64];
snprintf(range, sizeof range, "%zu-%zu", from, to);
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
curl_easy_setopt(curl, CURLOPT_RANGE, range);
curl_easy_perform(curl);
}

If the server supports range requests, use HTTP Range:
CURL* curl = curl_easy_init();
if (curl) {
char range[32];
const size_t size = 65536;
snprintf(range, sizeof(range), "0-%zu", size - 1);
curl_easy_setopt(curl, CURLOPT_URL, "http://httpbin.org/range/65536");
curl_easy_setopt(curl, CURLOPT_RANGE, range);
curl_easy_perform(curl);
}
See CURLOPT_RANGE for more details.
If the server does not support range requests, use a progress or a write function to terminate the request if the size is reached.
const size_t size = 65535;
struct memory {
char* response;
size_t size;
};
static size_t cb(void* data, size_t size, size_t nmemb, void* userp) {
size_t realsize = size * nmemb;
struct memory* mem = (struct memory*)userp;
if (mem->size + realsize > size)
realsize = size - mem->size;
char* ptr = realloc(mem->response, mem->size + realsize);
if (ptr == NULL)
return 0; /* out of memory! */
mem->response = ptr;
memcpy(&(mem->response[mem->size]), data, realsize);
mem->size += realsize;
/* if realsize < size * nmemb, this will cause the transfer to get
aborted and curl_easy_perform will return URLE_WRITE_ERROR */
return realsize;
}
struct memory chunk = {0};
CURL* curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "http://httpbin.org/stream-bytes/131072");
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, cb);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void*)&chunk);
curl_easy_perform(curl);
}
If URLE_WRITE_ERROR is not desired, you can use curl_multi_perform(multi, &running_handles) and remove handle from multi if size reached. See curl_multi_remove_handle for more details.

File stream to HTTP server using libcurl

I am trying to develop a C application to upload a file on HTTP endpoint.
I used below command from terminal to upload file, which works fine,
curl -F file=#/filename.txt -O http://serveraddress/download
Since, I need it in C application I took reference from https://curl.haxx.se/libcurl/c/postit2-formadd.html which is also working fine. the file supplied is getting uploaded successfully.
I want to upload the file similar way but the file is not present on local storage. The file is being downloaded in an application using Curl. From the beginning, we have the size of the file which will be downloaded. I want to stream that file directly to the HTTP endpoint as above(multipart file upload) because I don't have enough local storage to download entire file and then upload.
File--> CurlDownloadHandle-->Write() ----- Read()-->CurlUploadHandle---File
I am not sure how to use HTTPPOST, HTTPPOSTFILED and all. Is there anyway I can achieve this streaming?
Any suggestion is much appreciated.
Thanks.

I have figured out below method to achieve my requirement.
File streaming can be achieved using curl_formadd(for older version of libcurl)and curl_mime(for newer version > 7.58.0) APIs.
Using Mime APIs:
CURL *curl_handle;
curl_mime *mime;
curl_mimepart *part;
mime = curl_mime_init(curl_handle);
part = curl_mime_addpart(mime);
curl_mime_name(part, "sendfile");
curl_mime_filename(part, "uploadfile.txt");
curl_mime_data_cb(part, file_size, read_buffer, NULL, NULL, NULL);
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_MIMEPOST, mime);
curl_easy_perform(curl_handle);
The read callback function given in curl_mime_data_cb() reads from the buffer/queue, this buffer/queue data is getting filled by other application which is downloading the file using libcurl.
I hope it helps someone.

I recommend you to try it with xmlstream please check out the code below maybe it will help.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <expat.h>
#include <curl/curl.h>
struct MemoryStruct {
char *memory;
size_t size;
};
struct ParserStruct {
int ok;
size_t tags;
size_t depth;
struct MemoryStruct characters;
};
static void startElement(void *userData, const XML_Char *name,
const XML_Char **atts)
{
struct ParserStruct *state = (struct ParserStruct *) userData;
state->tags++;
state->depth++;
/* Get a clean slate for reading in character data. */
free(state->characters.memory);
state->characters.memory = NULL;
state->characters.size = 0;
}
static void characterDataHandler(void *userData, const XML_Char *s, int len)
{
struct ParserStruct *state = (struct ParserStruct *) userData;
struct MemoryStruct *mem = &state->characters;
char *ptr = realloc(mem->memory, mem->size + len + 1);
if(!ptr) {
/* Out of memory. */
fprintf(stderr, "Not enough memory (realloc returned NULL).\n");
state->ok = 0;
return;
}
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), s, len);
mem->size += len;
mem->memory[mem->size] = 0;
}
static void endElement(void *userData, const XML_Char *name)
{
struct ParserStruct *state = (struct ParserStruct *) userData;
state->depth--;
printf("%5lu %10lu %s\n", state->depth, state->characters.size, name);
}
static size_t parseStreamCallback(void *contents, size_t length, size_t nmemb,
void *userp)
{
XML_Parser parser = (XML_Parser) userp;
size_t real_size = length * nmemb;
struct ParserStruct *state = (struct ParserStruct *) XML_GetUserData(parser);
/* Only parse if we're not already in a failure state. */
if(state->ok && XML_Parse(parser, contents, real_size, 0) == 0) {
int error_code = XML_GetErrorCode(parser);
fprintf(stderr, "Parsing response buffer of length %lu failed"
" with error code %d (%s).\n",
real_size, error_code, XML_ErrorString(error_code));
state->ok = 0;
}
return real_size;
}
int main(void)
{
CURL *curl_handle;
CURLcode res;
XML_Parser parser;
struct ParserStruct state;
/* Initialize the state structure for parsing. */
memset(&state, 0, sizeof(struct ParserStruct));
state.ok = 1;
/* Initialize a namespace-aware parser. */
parser = XML_ParserCreateNS(NULL, '\0');
XML_SetUserData(parser, &state);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterDataHandler);
/* Initialize a libcurl handle. */
curl_global_init(CURL_GLOBAL_DEFAULT);
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL,
"https://www.w3schools.com/xml/simple.xml");
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, parseStreamCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)parser);
printf("Depth Characters Closing Tag\n");
/* Perform the request and any follow-up parsing. */
res = curl_easy_perform(curl_handle);
if(res != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
else if(state.ok) {
/* Expat requires one final call to finalize parsing. */
if(XML_Parse(parser, NULL, 0, 1) == 0) {
int error_code = XML_GetErrorCode(parser);
fprintf(stderr, "Finalizing parsing failed with error code %d (%s).\n",
error_code, XML_ErrorString(error_code));
}
else {
printf(" --------------\n");
printf(" %lu tags total\n", state.tags);
}
}
/* Clean up. */
free(state.characters.memory);
XML_ParserFree(parser);
curl_easy_cleanup(curl_handle);
curl_global_cleanup();
return 0;
}

libcurl inserts additional data to download files

I use libcurl in my C code to download files given their urls. My code looks similar to this:
#include <stdio.h>
#include <curl.h>
#include <pthread.h>
static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
return written;
}
int progress_func(void *ptr, double TotalToDownload, double NowDownloaded,
double TotalToUpload, double NowUploaded)
{
struct my_custom_struct *my_dummy_data = (struct my_custom_struct *) data;
//do some stuffs here
return 0;
}
void *download_with_curl(void *data)
{
char *url = (char *) data;
int res = 0;
// My custom struct to store data
struct my_custom_struct my_dummy_data;
char errbuff[CURL_ERROR_SIZE] = {0};
CURL *curl_handle;
/* init the curl session */
curl_handle = curl_easy_init();
/* set URL to get here */
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
/* disable progress meter, set to 0L to enable*/
curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L);
/* send all data to this function*/
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_TIME, RESPOND_TIME);
curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_LIMIT, 30L);
/* set the progress function */
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, progress_func);
/* set the progress data */
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSDATA, &my_dummy_data);
/* provide a buffer to store errors in */
curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, errbuff);
FILE *pagefile = fopen(path_to_where_I_want_to_store_the_file, "wb");
/* write the page body to this file handle */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, pagefile);
/* get the file*/
int status = curl_easy_perform(curl_handle);
res = 0;
int response_code;
curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &response_code);
fclose(pagefile);
if (status != 0) {
log_warn("CURL ERROR %d: %s", status, errbuff);
response_code = -status;
}
/* cleanup curl stuff */
curl_easy_cleanup(curl_handle);
return NULL;
}
int main()
{
// sockfd = create a sockfd
// bind, listen
do {
// accept new connection
char *url;
// receive the url from client
pthread_t tid;
pthread_create(&tid, NULL, download_with_curl, url);
} while (1);
}
When I send a single download request, the code works fine. "Works fine" means that the md5sum values of the original file and the downloaded file are equal. However, when I send multiple requests to download multiple files, only the first file that is downloaded has the correct md5sum value. To be clear, if I send requests to download files A (200MB), B (5MB) and C (50MB) in that order, only file B is correctly downloaded because it is finished first. Files A and C will have incorrect md5sum values. Moreover, when I check the content of files A and C, it looks like curl just inserts random segments of data into them. If the original file content is
This is the content of a file
then the downloaded file is like
This is the #$%!##%#% content of $%(#(!)$()$%||##$%*&) a file
After spending two days of debugging, I finally solved the problem (I hope so). All I did was just flushing the data after calling fwrite. The function write_data now looks like this:
static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
fflush((FILE *) stream);
return written;
}
I do not know if it completely solves the problem or not. Could anyone explain why it behaves that way and give my a solution to this?
UPDATE 1
It seems that there is something to do with fwrite()'s internal buffer. Changing from fwrite(ptr, size, nmemb, stream) to write(fileno(stream), ptr, size * nmemb) seems to give the same result as using fflush().
UPDATE 2
Using the default write function (remove the option CURLOPT_WRITEFUNCTION) of libcurl gives the same problem.

Libcurl Callback Function Might Not be Working for Twitter Streaming [duplicate]

This question already has an answer here:
Closed 11 years ago.
Possible Duplicate:
Using HTTP authentication with libcurl in C for Twitter Streaming
I was able to write a C code that receives a stream of Tweets from streaming API. But the stream is not being put out (no output) by the code below. The code works when CURLOPT_URL is set to google.com or 9gag.com. I guess the problem has something to do with the steady stream of tweets with tremendous received data. The write_func callback function, which purpose is to print the response (stream), might not be working that is why there is no output? I am thinking that the callback function probably is being overwhelmed by the tremendous stream sent by Twitter API. Then if this is the case, how should I write the proper write callback function?
You might ask. I verified that the reception of stream is working because I watch my System Monitor's network history rise on received bytes whenever I execute the code.
Thanks!
The code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
struct string {
char *ptr;
size_t len;
};
void init_string(struct string *s) {
s->len = 0;
s->ptr = malloc(s->len+1);
if (s->ptr == NULL) {
fprintf(stderr, "malloc() failed\n");
exit(EXIT_FAILURE);
}
s->ptr[0] = '\0';
}
size_t writefunc(void *ptr, size_t size, size_t nmemb, struct string *s)
{
size_t new_len = s->len + size*nmemb;
s->ptr = realloc(s->ptr, new_len+1);
if (s->ptr == NULL) {
fprintf(stderr, "realloc() failed\n");
exit(EXIT_FAILURE);
}
memcpy(s->ptr+s->len, ptr, size*nmemb);
s->ptr[new_len] = '\0';
s->len = new_len;
return size*nmemb;
}
int main(void)
{
CURL *curl;
CURLcode res;
curl = curl_easy_init();
if(curl) {
struct string s;
init_string(&s);
curl_easy_setopt(curl, CURLOPT_URL, "https://stream.twitter.com/1/statuses/sample.json");
curl_easy_setopt(curl, CURLOPT_USERPWD, "neilmarion:password_here");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writefunc);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &s);
res = curl_easy_perform(curl);
printf("%s\n", s.ptr);
free(s.ptr);
/* always cleanup */
curl_easy_cleanup(curl);
}
return 0;
}

OK, I did some testing and it appears that URL just keeps sending data, it doesn't seem to complete. I killed it off after 15M. But if you put print statements in your callback or use strace you can see its working properly. Your string s just keeps growing and growing.
So one solution would be to change you callback to print and re-initialse s once it reaches a certain size. Otherwise it looks like the program will eventually run out of memory. So change your callback to be
size_t max_buffer = 10240; // 10K
size_t writefunc(void *ptr, size_t size, size_t nmemb, struct string *s)
{
size_t new_len = s->len + size*nmemb;
s->ptr = realloc(s->ptr, new_len+1);
if (s->ptr == NULL) {
fprintf(stderr, "realloc() failed\n");
exit(EXIT_FAILURE);
}
memcpy(s->ptr+s->len, ptr, size*nmemb);
s->ptr[new_len] = '\0';
s->len = new_len;
// Begin newly added code
if( s->len >= max_buffer )
{
printf("%s", s->ptr);
fflush( stdout );
free(s->ptr);
initString( s );
}
// End newly added code
return size*nmemb;
}
And still keep the print at the end. To dump the last bit and the trailing newline. Now that you have a buffered solution you could look at a more efficient implementation which doesn't need dynamically added memory.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

C libcurl print webpage contents - c

Have you tried: printf("%lu bytes retrieved\n", (long)chunk.size); printf("%s", chunk.memory); You might be surprised.

Related

client C for API calls with curl_multi_*

curl aternative to InternetReadFile

File stream to HTTP server using libcurl

libcurl inserts additional data to download files

Libcurl Callback Function Might Not be Working for Twitter Streaming [duplicate]

Categories

Resources