I would like to create a C client that makes asynchronous API calls with lib curl and saves the responses, the calls are about a hundred at the same time. I have been looking for internet tutorials and examples for curl_multi_ * and curl_multi_socket with epoll for 4 days (I use linux) but they seem not to exist, and those few examples are not understandable to someone who is a beginner like me. Apparently I'm the only one interested in doing such a thing in C.
I also looked at the official documentation examples, but it uses a maximum of 2 connections at the same time and to do this declares two variables and calls curl_easy_init(), but the problem is that the requests made by the program are not a precise number so I cannot declare a number of variables a priori (even though it's not possible to declare 100 variables).
I found out this example of curl_multi_socket with epoll is difficult to understand and replicate for my case without an explanation of how it works.
Is there anyone who can give me a code example on how to use curl_multi_ * for multiple simultaneous connections to start with? it would be much appreciated.
EDIT:
after hours of research, I finally found an example that might be fit, the problem is that it crashes often and for various reasons
#define NUM_URLS 64
typedef struct data { // 24 / 24 Bytes
struct curl_slist * header;
char ** sub_match_json;
int nbr_sub_match;
int response_counter;
} data_t;
// list of the same URL repeated multiple times
// assume there are 64 url for example
static char *urls[] = {}
void make_header(data_t * data) {
//many curl_slist_append();
}
void init_data(data_t *data) {
data->sub_match_json = (char **)malloc(sizeof(char *) * NUM_URLS);
data->response_counter = 0;
data->nbr_sub_match = NUM_URLS;
make_header(data);
}
static size_t write_cb(void *response, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
data_t * data = (data_t *) userp;
data->sub_match_json[data->response_counter] = malloc(realsize + 1);
if(data->sub_match_json[data->response_counter] == NULL)
{
fprintf(stderr, "Memory allocation failed: %s\n", strerror(errno));
return 0; /* out of memory! */
}
memcpy(data->sub_match_json[data->response_counter], response, realsize);
data->sub_match_json[data->response_counter][realsize] = 0;
data->response_counter++;
return realsize;
}
static void add_transfer(CURLM *cm, int i, data_t *data)
{
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 1<<23);
// curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
// curl_easy_setopt(curl, CURLOPT_TCP_FASTOPEN, 1L);
// curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, data->header);
curl_easy_setopt(curl, CURLOPT_HEADER, 1L);
curl_easy_setopt(curl, CURLOPT_URL, urls[i]);
curl_easy_setopt(curl, CURLOPT_PRIVATE, urls[i]);
curl_multi_add_handle(cm, curl);
}
int main(void)
{
CURLM *cm;
CURLMsg *msg;
data_t global_data;
unsigned int transfers = 0;
int msgs_left = -1;
int still_alive = 1;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
init_data(NULL, &global_data); // my function
/* Limit the amount of simultaneous connections curl should allow: */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX_PARALLEL);
for(transfers = 0; transfers < MAX_PARALLEL; transfers++)
add_transfer(cm, transfers, &global_data);
do {
curl_multi_perform(cm, &still_alive);
while((msg = curl_multi_info_read(cm, &msgs_left))) {
if(msg->msg == CURLMSG_DONE) {
char *url;
CURL *e = msg->easy_handle;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &url);
fprintf(stderr, "R: %d - %s <%s>\n",
msg->data.result, curl_easy_strerror(msg->data.result), url);
curl_multi_remove_handle(cm, e);
curl_easy_cleanup(e);
}
else {
fprintf(stderr, "E: CURLMsg (%d)\n", msg->msg);
}
if(transfers < global_data.nbr_sub_match)
add_transfer(cm, transfers++, &global_data);
}
if(still_alive)
curl_multi_wait(cm, NULL, 0, 1000, NULL);
} while(still_alive || (transfers < NUM_URLS));
curl_multi_cleanup(cm);
curl_global_cleanup();
while (global_data.response_counter-- >= 0) {
printf("%s\n", global_data.sub_match_json[global_data.response_counter]);
}
return EXIT_SUCCESS;
}
Error:
api_calls(75984,0x100088580) malloc: Incorrect checksum for freed object 0x100604c30: probably modified after being freed.
Corrupt value: 0x600002931f10
api_calls(75984,0x100088580) malloc: *** set a breakpoint in malloc_error_break to debug
this is on curl_easy_cleanup(e);
Exception has occurred.
EXC_BAD_ACCESS (code=1, address=0x0)
otherwise, when no error occurs, in sub_match_json there are bytes and no char. Why this ?
Related
I wanted to check whether curl has any alternative like InternetReadFile which returns the content with size specified in the buffer size.
I have used:
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, Read_Cb);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &ReadBuffer);
curl_easy_perform(curl_handle);
But my Read_Cb gets called back multiple time (which is documented behaviour) and that is fine.
I want curl_easy_perform to return when my buffer size is reached. I explored CURLOPT_BUFFERSIZE, but that doesn't seem to help here.
CURLE_WRITE_ERROR is a problem becuase it aborts the transfer. I could have returned something from my callback which will gracefully tell curl to return curl_easy_perform.
Does CURLOPT_RANGE help?
CURL *curl = curl_easy_init();
if (curl)
{
size_t from = 0, to = 1024;
char range[64];
snprintf(range, sizeof range, "%zu-%zu", from, to);
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
curl_easy_setopt(curl, CURLOPT_RANGE, range);
curl_easy_perform(curl);
}
If the server supports range requests, use HTTP Range:
CURL* curl = curl_easy_init();
if (curl) {
char range[32];
const size_t size = 65536;
snprintf(range, sizeof(range), "0-%zu", size - 1);
curl_easy_setopt(curl, CURLOPT_URL, "http://httpbin.org/range/65536");
curl_easy_setopt(curl, CURLOPT_RANGE, range);
curl_easy_perform(curl);
}
See CURLOPT_RANGE for more details.
If the server does not support range requests, use a progress or a write function to terminate the request if the size is reached.
const size_t size = 65535;
struct memory {
char* response;
size_t size;
};
static size_t cb(void* data, size_t size, size_t nmemb, void* userp) {
size_t realsize = size * nmemb;
struct memory* mem = (struct memory*)userp;
if (mem->size + realsize > size)
realsize = size - mem->size;
char* ptr = realloc(mem->response, mem->size + realsize);
if (ptr == NULL)
return 0; /* out of memory! */
mem->response = ptr;
memcpy(&(mem->response[mem->size]), data, realsize);
mem->size += realsize;
/* if realsize < size * nmemb, this will cause the transfer to get
aborted and curl_easy_perform will return URLE_WRITE_ERROR */
return realsize;
}
struct memory chunk = {0};
CURL* curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "http://httpbin.org/stream-bytes/131072");
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, cb);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void*)&chunk);
curl_easy_perform(curl);
}
If URLE_WRITE_ERROR is not desired, you can use curl_multi_perform(multi, &running_handles) and remove handle from multi if size reached. See curl_multi_remove_handle for more details.
I am downloading file quite commonly with curl. However, the server does a tricky thing: it return non-200 code and still sends some data. The problem is that I have the HTTP code after the data are written, but I do not want to write anything if it is non-200. Does anyone know a way to do that without storing data on disk or memory?
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriteHandler);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, ptr);
res = curl_easy_perform(curl);
if (res == CURLE_OK) {
return 0;
}
long response_code;
curl_easy_getinfo(curl_.get(), CURLINFO_RESPONSE_CODE, &response_code);
if (response_code != 200) {
return 0;
}
size_t curlWriteHandler(char* chars, size_t size, size_t nmemb, void* userp) {
// write to file
return size * nmemb;
}
Setting CURLOPT_FAILONERROR should do it for 4xx and 5xx errors.
When this option is used and an error is detected, it will cause the connection to get closed and CURLE_HTTP_RETURNED_ERROR is returned.
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L);
Closing connection is not good for me, it is important to reuse one. Can you think about anything else?
Unfortunately I can't find a way to make CURLOPT_FAILONERROR not close the connection.
The other option is to make the write function aware of the response. Unfortunately the curl handle is not passed into the callback.
We could make the curl variable global. Or we can take advantage of the void *userdata option to the write callback and pass in a struct containing both the curl handle and the buffer.
Here's a rough sketch demonstrating how the write callback can get access to the response code and also save the response body.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <curl/curl.h>
typedef struct {
CURL *curl;
char *buf;
} curl_write_data;
size_t curlWriteHandler(char* chars, size_t size, size_t nmemb, void* userp) {
curl_write_data *curl_data = (curl_write_data*)userp;
long response_code;
curl_easy_getinfo(curl_data->curl, CURLINFO_RESPONSE_CODE, &response_code);
printf("Response: %ld\n", response_code);
// Now we can save if we like.
if( response_code < 300 ) {
curl_data->buf = malloc(size*(nmemb+1));
strcpy(curl_data->buf, chars);
strcat(curl_data->buf, "\0");
return size * nmemb;
}
else {
return 0;
}
}
int main() {
CURL *curl = curl_easy_init();
if(!curl) {
perror("Cant' init curl");
}
curl_write_data curl_data = { .curl = curl, .buf = NULL };
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com/alsdfjalj");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriteHandler);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &curl_data);
curl_easy_perform(curl);
if( curl_data.buf ) {
puts(curl_data.buf);
}
}
I'm not sure if this is the best idea, its what I came up with.
I'm trying to play with libcurl SMTP and everything works fine with this example: http://curl.haxx.se/libcurl/c/smtp-mail.html
#include <stdio.h>
#include <string.h>
#include <curl/curl.h>
/* This is a simple example showing how to send mail using libcurl's SMTP
* capabilities. For an exmaple of using the multi interface please see
* smtp-multi.c.
*
* Note that this example requires libcurl 7.20.0 or above.
*/
#define FROM "<sender#example.org>"
#define TO "<addressee#example.net>"
#define CC "<info#example.org>"
static const char *payload_text[] = {
"Date: Mon, 29 Nov 2010 21:54:29 +1100\r\n",
"To: " TO "\r\n",
"From: " FROM "(Example User)\r\n",
"Cc: " CC "(Another example User)\r\n",
"Message-ID: <dcd7cb36-11db-487a-9f3a-e652a9458efd#rfcpedant.example.org>\r\n",
"Subject: SMTP example message\r\n",
"\r\n", /* empty line to divide headers from body, see RFC5322 */
"The body of the message starts here.\r\n",
"\r\n",
"It could be a lot of lines, could be MIME encoded, whatever.\r\n",
"Check RFC5322.\r\n",
NULL
};
struct upload_status {
int lines_read;
};
static size_t payload_source(void *ptr, size_t size, size_t nmemb, void *userp)
{
struct upload_status *upload_ctx = (struct upload_status *)userp;
const char *data;
if((size == 0) || (nmemb == 0) || ((size*nmemb) < 1)) {
return 0;
}
data = payload_text[upload_ctx->lines_read];
if(data) {
size_t len = strlen(data);
memcpy(ptr, data, len);
upload_ctx->lines_read++;
return len;
}
return 0;
}
int main(void)
{
CURL *curl;
CURLcode res = CURLE_OK;
struct curl_slist *recipients = NULL;
struct upload_status upload_ctx;
upload_ctx.lines_read = 0;
curl = curl_easy_init();
if(curl) {
/* This is the URL for your mailserver */
curl_easy_setopt(curl, CURLOPT_URL, "smtp://mail.example.com");
/* Note that this option isn't strictly required, omitting it will result in
* libcurl sending the MAIL FROM command with empty sender data. All
* autoresponses should have an empty reverse-path, and should be directed
* to the address in the reverse-path which triggered them. Otherwise, they
* could cause an endless loop. See RFC 5321 Section 4.5.5 for more details.
*/
curl_easy_setopt(curl, CURLOPT_MAIL_FROM, FROM);
/* Add two recipients, in this particular case they correspond to the
* To: and Cc: addressees in the header, but they could be any kind of
* recipient. */
recipients = curl_slist_append(recipients, TO);
recipients = curl_slist_append(recipients, CC);
curl_easy_setopt(curl, CURLOPT_MAIL_RCPT, recipients);
/* We're using a callback function to specify the payload (the headers and
* body of the message). You could just use the CURLOPT_READDATA option to
* specify a FILE pointer to read from. */
curl_easy_setopt(curl, CURLOPT_READFUNCTION, payload_source);
curl_easy_setopt(curl, CURLOPT_READDATA, &upload_ctx);
curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L);
/* Send the message */
res = curl_easy_perform(curl);
/* Check for errors */
if(res != CURLE_OK)
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
/* Free the list of recipients */
curl_slist_free_all(recipients);
/* curl won't send the QUIT command until you call cleanup, so you should be
* able to re-use this connection for additional messages (setting
* CURLOPT_MAIL_FROM and CURLOPT_MAIL_RCPT as required, and calling
* curl_easy_perform() again. It may not be a good idea to keep the
* connection open for a very long time though (more than a few minutes may
* result in the server timing out the connection), and you do want to clean
* up in the end.
*/
curl_easy_cleanup(curl);
}
return (int)res;
}
The email which I send looks like this:
Return-Path: ...
Received: from Hostname (ip)
...
Date:...
Subject:...
...
But how could I change my own hostname from libcurl. Is it possible to send it in the header or something like this? Or I only could change it by adding the lines in /etc/hostname file?
In my OS X application, I've been using NSURLRequest for quite a while to send an HTTP request to a server and receive the response.
Now I need to migrate this code to use libcurl instead to be able to run it on Windows and Mac.
For some reason, I can't get it to work...
Here's what I'm trying to do:
sending a block to the server using HTTP post
Server evaluates block and sends response back
receiving response, evaluating, proceeding ...
When the block that is sent along with the request is valid, an "application/octet-stream" data block is sent back to the requesting application right after. If the file is not valid, a return code 403 is returned.
When I try to put the post request URL and the data block directly in the browser, it works.
If the data block is valid, a file will be downloaded, if not: a 403 page is shown.
My libcurl code looks like this:
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if (curl)
{
struct url_data data;
data.size = 0;
data.data = (char*) malloc(4096); /* reasonable size initial buffer */
if(NULL == data.data)
{
fprintf(stderr, "Failed to allocate memory.\n");
return NULL;
}
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
curl_easy_setopt(curl, CURLOPT_URL, "<my-url>");
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, [postData cStringUsingEncoding:NSASCIIStringEncoding]);
curl_slist_append(headerlist, "Content-Type: application/x-www-form-urlencoded");
curl_slist_append(headerlist, [length cStringUsingEncoding:NSASCIIStringEncoding]);
curl_easy_setopt(curl, CURLOPT_HEADER, headerlist);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &data);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
res = curl_easy_perform(curl);
if(res != CURLE_OK)
{
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
}
curl_global_cleanup();
}
And my write function looks like this:
struct url_data
{
size_t size;
char* data;
};
size_t write_data(void *ptr, size_t size, size_t nmemb, struct url_data *data)
{
size_t index = data->size;
size_t n = (size * nmemb);
char* tmp;
data->size += (size * nmemb);
fprintf(stderr, "data at %p size=%ld nmemb=%ld\n", ptr, size, nmemb);
tmp = (char*) realloc(data->data, data->size + 1); /* +1 for '\0' */
memcpy((data->data + index), ptr, n);
data->data[data->size] = '\0';
return size * nmemb;
}
I am receiving the right return codes and also the 403 page in case the block is not valid. In case it's valid, I'm not receiving the response block. The write function is never being called. The connection is always closed immediately.
Can anyone tell me what I'm doing wrong?
Here's a part of the log output of libcurl:
* Adding handle: conn: 0x2a9ee00
* Adding handle: send: 0
* Adding handle: recv: 0
* Curl_addHandleToPipeline: length: 1
* - Conn 0 (0x2a9ee00) send_pipe: 1, recv_pipe: 0
* About to connect() to <my-url> port 80 (#0)
* Trying <my-url-ip>...
* Connected to <my-url> (<my-url-ip>) port 80 (#0)
Does this recv-pipe cause the problem maybe?
Should it be 1?
I'm curious to hear your thoughts! Thanks in advance!
You should pass your custom headers with curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerlist); and not using CURLOPT_HEADER that expects a long as parameter (0 or 1 to not include/include the header in the body output).
The only problem I found is that you didn't saved the new address from realloc(). It will surely crash, but the write_data() should have been called at least once.
How do you know it is never being called?
Please try this write_data() instead:
size_t write_data(void *ptr, size_t size, size_t nmemb, struct url_data *data)
{
size_t index = data->size;
size_t n = (size * nmemb);
char* tmp;
data->size += (size * nmemb);
fprintf(stderr, "data at %p size=%ld nmemb=%ld\n", ptr, size, nmemb);
tmp = (char*) realloc(data->data, data->size + 1); /* +1 for '\0' */
if (tmp == NULL) {
fputs("Error (re)allocating memory", stderr);
return 0;
}
data->data = tmp;
memcpy((data->data + index), ptr, n);
data->data[data->size] = '\0';
return size * nmemb;
}
Here I implemented code for file download from server. its working fine.
Now I want to make my own progress bar function which calculates some data like remaining seconds data Rate per second etc.
So from here I found one way to use curl progress bar option. how we can enable this option.
I completely done with this.
I put my code below. here in this code my_progress_func calls frequently as per curl library time interval. I want to change this interval time and make it to 1 second. is it possible in curl library using to set some options for curl library?
I want to call this my_progress_func function after every 1 second.
Code :
#include <stdio.h>
#include <curl/curl.h>
long test =0;
struct FtpFile {
const char *filename;
FILE *stream;
long iAppend;
};
static size_t my_fwrite(void *buffer, size_t size, size_t nmemb, void *stream)
{
struct FtpFile *out=(struct FtpFile *)stream;
if(out && !out->stream) {
/* open file for writing */
out->stream=fopen(out->filename, out->iAppend ? "ab":"wb");
if(!out->stream)
return -1; /* failure, can't open file to write */
}
out->iAppend += nmemb;
return fwrite(buffer, size, nmemb, out->stream);
}
int my_progress_func(void *bar,
double t, /* dltotal */
double d, /* dlnow */
double ultotal,
double ulnow)
{
printf("%f : %f \n", d, t);
return 0;
}
int main(void)
{
CURL *curl;
CURLcode res;
int c;
struct FtpFile ftpfile={
"dev.zip", /* name to store the file as if succesful */
NULL,
};
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL,
"sftp://root:xyz_#192.170.10.1/mnt/xyz.tar");
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 120L);
/* Define our callback to get called when there's data to be written */
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, my_fwrite);
/* Set a pointer to our struct to pass to the callback */
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ftpfile);
curl_easy_setopt(curl, CURLOPT_FTPPORT, "-");
/* Switch on full protocol/debug output */
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, my_progress_func);
res = curl_easy_perform(curl);
printf("res is %d\n, data get %ld\n", res, ftpfile.iAppend);
///Retry upto 100 times it timeout or connection drop occur
for (c = 0; (res != CURLE_OK) && (c < 100); c++) {
curl_easy_setopt(curl, CURLOPT_RESUME_FROM , ftpfile.iAppend);
res = curl_easy_perform(curl);
if(res == CURLE_OK) c =0;
printf("%d res is %d\n, data get %ld\n",c, res, ftpfile.iAppend);
}
/* always cleanup */
curl_easy_cleanup(curl);
}
if(ftpfile.stream)
fclose(ftpfile.stream); /* close the local file */
curl_global_cleanup();
return 0;
}
According to the curl documentation:
http://curl.haxx.se/libcurl/c/curl_easy_setopt.html
Function pointer that should match the curl_progress_callback
prototype found in . This function gets called by libcurl
instead of its internal equivalent with a frequent interval during
operation (roughly once per second or sooner) no matter if data is
being transfered or not. Unknown/unused argument values passed to the
callback will be set to zero (like if you only download data, the
upload size will remain 0). Returning a non-zero value from this
callback will cause libcurl to abort the transfer and return
CURLE_ABORTED_BY_CALLBACK.
If it's calling too frequently then you can use time() and a static var to limit this, something like this:
static time_t prevtime;
time_t currtime;
double dif;
static int first = 1;
if(first) {
time(&prevtime);
first = 0;
}
time(&currtime);
dif = difftime(currtime, prevtime);
if(dif < 1.0)
return;
prevtime = currtime;
Obviously, you run the risk that curl might not call this function again for fully another second.