hi I'm a noobie with pthread and http requests in C, but I wanted to know if there is a way to make multiple asynchronous http calls in C, possibly at the same time. I ask "at the same time" because having a list of about 200-300 urls I wanted to parallelize requests by working on multiple threads (since asynchronous work on only one thread) passing sublists to threads and getting responses as fast as possible, avoiding bottleneck. I haven't found much for C looking on the internet, I only found curl_multi_* but i didn't quite understand how to use it. Thus I thought about writing something like this but it doesn't seem to solve the problem. Does anyone have any tips? or good tutorial for this stuff in C?
void *make_req(void *arguments) {
CURL *curl;
CURLcode res;
memory_t chunk;
data_t * data = (data_t*)arguments;
static int i = -1;
chunk.response = malloc(1);
chunk.size = 0;
curl = curl_easy_init();
while (i < data->nbr_sub_match) {
pthread_mutex_lock(&lock);
i++;
pthread_mutex_unlock(&lock);
curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 1<<23);
// curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip, deflate");
curl_easy_setopt(curl, CURLOPT_TCP_FASTOPEN, 1L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, data->header);
curl_easy_setopt(curl, CURLOPT_URL, data->sub_match_urls[i]);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
/* send all data to this function */
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, cb);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
/* get it! */
res = curl_easy_perform(curl);
/* check for errors */
if(res != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
}
pthread_mutex_lock(&lock);
data->sub_match_json[data->response_counter++] = chunk.response;
pthread_mutex_unlock(&lock);
}
/* cleanup curl stuff */
curl_easy_cleanup(curl);
i = -1;
// return chunk.response;
return NULL;
}
void start_threads(data_t *data) {
int err, i = 0;
pthread_t thread[data->nbr_threads];
curl_global_init(CURL_GLOBAL_ALL);
if(pthread_mutex_init(&lock, NULL) != 0)
fprintf(stderr, "failed to initialize mutex: %s\n", strerror(errno));
while (i < data->nbr_threads) {
err = pthread_create(&thread[i], NULL, make_req, (void *)data);
if (err != 0)
{
fprintf(stderr, "Error: impossible make new thread %s\n", strerror(errno));
exit(1);
}
i++;
}
// pthread_mutex_destroy(&lock);
while (--i >= 0) {
pthread_join(thread[i], NULL);
}
curl_global_cleanup();
}
Related
I would like to create a C client that makes asynchronous API calls with lib curl and saves the responses, the calls are about a hundred at the same time. I have been looking for internet tutorials and examples for curl_multi_ * and curl_multi_socket with epoll for 4 days (I use linux) but they seem not to exist, and those few examples are not understandable to someone who is a beginner like me. Apparently I'm the only one interested in doing such a thing in C.
I also looked at the official documentation examples, but it uses a maximum of 2 connections at the same time and to do this declares two variables and calls curl_easy_init(), but the problem is that the requests made by the program are not a precise number so I cannot declare a number of variables a priori (even though it's not possible to declare 100 variables).
I found out this example of curl_multi_socket with epoll is difficult to understand and replicate for my case without an explanation of how it works.
Is there anyone who can give me a code example on how to use curl_multi_ * for multiple simultaneous connections to start with? it would be much appreciated.
EDIT:
after hours of research, I finally found an example that might be fit, the problem is that it crashes often and for various reasons
#define NUM_URLS 64
typedef struct data { // 24 / 24 Bytes
struct curl_slist * header;
char ** sub_match_json;
int nbr_sub_match;
int response_counter;
} data_t;
// list of the same URL repeated multiple times
// assume there are 64 url for example
static char *urls[] = {}
void make_header(data_t * data) {
//many curl_slist_append();
}
void init_data(data_t *data) {
data->sub_match_json = (char **)malloc(sizeof(char *) * NUM_URLS);
data->response_counter = 0;
data->nbr_sub_match = NUM_URLS;
make_header(data);
}
static size_t write_cb(void *response, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
data_t * data = (data_t *) userp;
data->sub_match_json[data->response_counter] = malloc(realsize + 1);
if(data->sub_match_json[data->response_counter] == NULL)
{
fprintf(stderr, "Memory allocation failed: %s\n", strerror(errno));
return 0; /* out of memory! */
}
memcpy(data->sub_match_json[data->response_counter], response, realsize);
data->sub_match_json[data->response_counter][realsize] = 0;
data->response_counter++;
return realsize;
}
static void add_transfer(CURLM *cm, int i, data_t *data)
{
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 1<<23);
// curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
// curl_easy_setopt(curl, CURLOPT_TCP_FASTOPEN, 1L);
// curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, data->header);
curl_easy_setopt(curl, CURLOPT_HEADER, 1L);
curl_easy_setopt(curl, CURLOPT_URL, urls[i]);
curl_easy_setopt(curl, CURLOPT_PRIVATE, urls[i]);
curl_multi_add_handle(cm, curl);
}
int main(void)
{
CURLM *cm;
CURLMsg *msg;
data_t global_data;
unsigned int transfers = 0;
int msgs_left = -1;
int still_alive = 1;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
init_data(NULL, &global_data); // my function
/* Limit the amount of simultaneous connections curl should allow: */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX_PARALLEL);
for(transfers = 0; transfers < MAX_PARALLEL; transfers++)
add_transfer(cm, transfers, &global_data);
do {
curl_multi_perform(cm, &still_alive);
while((msg = curl_multi_info_read(cm, &msgs_left))) {
if(msg->msg == CURLMSG_DONE) {
char *url;
CURL *e = msg->easy_handle;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &url);
fprintf(stderr, "R: %d - %s <%s>\n",
msg->data.result, curl_easy_strerror(msg->data.result), url);
curl_multi_remove_handle(cm, e);
curl_easy_cleanup(e);
}
else {
fprintf(stderr, "E: CURLMsg (%d)\n", msg->msg);
}
if(transfers < global_data.nbr_sub_match)
add_transfer(cm, transfers++, &global_data);
}
if(still_alive)
curl_multi_wait(cm, NULL, 0, 1000, NULL);
} while(still_alive || (transfers < NUM_URLS));
curl_multi_cleanup(cm);
curl_global_cleanup();
while (global_data.response_counter-- >= 0) {
printf("%s\n", global_data.sub_match_json[global_data.response_counter]);
}
return EXIT_SUCCESS;
}
Error:
api_calls(75984,0x100088580) malloc: Incorrect checksum for freed object 0x100604c30: probably modified after being freed.
Corrupt value: 0x600002931f10
api_calls(75984,0x100088580) malloc: *** set a breakpoint in malloc_error_break to debug
this is on curl_easy_cleanup(e);
Exception has occurred.
EXC_BAD_ACCESS (code=1, address=0x0)
otherwise, when no error occurs, in sub_match_json there are bytes and no char. Why this ?
I'm using Curl library to create a simple C code with MSVC to download a file from a URL.
The problem is if the connection breaks in the middle of download my code will freeze and the unfinished file hasn't removed from the directory.
What I want is if the download failed the program must retry the connection or remove the unfinished file and then try again. I prefer to use C libraries rather than C++ libs. Here is the code I am using:
//lib for curl
#include <curl/curl.h>
#define CURL_STATICLIB
bool downloader3(string url, string file_path) {
CURL *curl;
FILE *fp;
CURLcode res;
curl = curl_easy_init();
if (curl) {
fp = fopen(file_path.c_str(), "wb");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl);
//always cleanup
curl_easy_cleanup(curl);
fclose(fp);
double val;
res = curl_easy_getinfo(curl, CURLINFO_SPEED_DOWNLOAD, &val);
if ((CURLE_OK == res) && (val>0))
printf("Average download speed: %0.3f kbyte/sec.\n", val / 1024);
if ((res == CURLE_OK)) {
printf("Download Successful!\r\n");
return true;
}
else {
printf("Downlaod Failed!\r\n");
remove(file_path.c_str()); //remove the temp file
return false;
}
}
}
EDIT---
Thanks to Ring Ø answer. I modifed the code but I am looking for a resume capability that can resume the download of incomplete file.
bool downloader3(string url, string file_path) {
CURL *curl;
FILE *fp = NULL;
CURLcode res;
int status;
int maxtries = 3;
do {
printf("Doing try # %d\r\n", maxtries);
curl = curl_easy_init();
if (curl) {
fp = fopen(file_path.c_str(), "wb");
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10L); // 30 seconds
res = curl_easy_perform(curl);
//always cleanup
curl_easy_cleanup(curl);
fclose(fp);
if ((res == CURLE_OK)) {
printf("Download Successful!\r\n");
break;
//return true;
}
}
} while (--maxtries);
if (maxtries) { // was OK
//curl_easy_cleanup(curl); // clean curl / delete file?
//fclose(fp);
return true;
}
else {
printf("Download Failed!\r\n");
printf("file path is: %s", file_path.c_str());
Sleep(5000);
status = remove(file_path.c_str()); //remove the unfinished file
if (status == 0)
printf("%s file deleted successfully.\n", file_path);
else
{
printf("Unable to delete the file\n");
}
return false;
}
}
You could set a timeout option
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30L); // 30 seconds
if the operation is not done within 30 seconds, the timeout is triggered. Then check the result value, in a while loop for instance
res = curl_easy_perform( ... );
if (res == CURLE_OK) {
break;
}
// delete file
// keep retrying (add a counter if necessary)
See also the curl page.
Loop example
int maxtries = 5;
do {
curl = curl_easy_init();
if (curl) {
...
res = curl_easy_perform( ... );
if (res == CURLE_OK) {
break;
}
// delete file, curl cleanup...
}
} while ( --maxtries );
if (maxtries) { // was OK
// clean curl / delete file?
}
This is not the ideal solution, as you said, the download may take more or less time. This (should) prevent a never ending program, provided the timeout is big enough.
Curl library was known to have some problems in case of erratic connection - there could be something better nowadays, please try the latest stable build.
If you don't get a better answer within a few days, try to add a "Bounty" of 50 rep to attract more attention.
What you are looking for is the RESUME_FROM feature. To use this you must know which byte you want to start the download from. In this example it is an upload but should be same setopt technique. Here is example usage from curl website:
CURL *curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, "ftp://example.com");
/* resume upload at byte index 200 */
curl_easy_setopt(curl, CURLOPT_RESUME_FROM, 200L);
/* ask for upload */
curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L);
/* set total data amount to expect */
curl_easy_setopt(curl, CURLOPT_INFILESIZE, size_of_file);
/* Perform the request */
curl_easy_perform(curl);
}
source: https://curl.haxx.se/libcurl/c/CURLOPT_RESUME_FROM.html
Anyone can help me? i'm new in programming. How to make str and str2 compareable? (with code if can). or any code that can find a spesific word inside CURLcode
int main(void) {
CURL *curl;
CURLcode res;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "mabinogi.nexon.net");
/* example.com is redirected, so we tell libcurl to follow redirection*/
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
/* Perform the request, res will get the return code */
res = curl_easy_perform(curl);
/* Check for errors */
if (res != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
const char *str = curl_easy_strerror(res);
char *str2 = "nogi";
char *ff = strstr(str,str2); /*how to make this compareable?*/
if (ff) {
printf("word found!");
}
printf(*str);
/* always cleanup */
curl_easy_cleanup(curl);
}
return 0;
}
In a C project I want to reuse curl easy handles. The flow of program is like:
Client --> C Application --> Call URL1, do something, Call URL2, do something, Call URL3...
In short, for each client request, the same set of URLs are called.
I initially create the curl easy handle when the program starts. The main program creates a configurable number of child, so each child gets its own easy handle.
static int child_init(int rank) {
LM_NOTICE("init_child [%d] pid [%d]\n", rank, getpid());
pid = my_pid();
curl_global_init(CURL_GLOBAL_ALL);
// initialize curl handle
curl = curl_easy_init();
if (!curl) {
LM_ERR("Child %d: Curl initialization failed.\n", rank);
return -1;
}
//create some connections before actual requests come.
curl_head(URL);
return 0;
}
I created a C file where the functions are created to handle GET/POST/PUT etc requests:
int curl_head(const char* url) {
if (!url) {
LM_ERR("URL not provided. Returning with error.\n");
return -1;
}
CURLcode res;
int http_code = 0;
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "charsets: utf-8");
/* set URL */
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "HEAD");
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
LM_ERR("curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
}
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
LM_DBG("HTTP return CODE %d\n", http_code);
curl_slist_free_all(headers);
curl_easy_reset(curl);
return http_code;
}
int curl_post(const char* url, char *postdata) {
if (!url) {
LM_ERR("URL not provided. Returning with error.\n");
return -1;
}
CURLcode res;
int http_code = 0;
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Accept: application/json");
headers = curl_slist_append(headers, "Content-Type: application/json");
headers = curl_slist_append(headers, "charsets: utf-8");
/* set URL */
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postdata);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcrp/0.1");
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
LM_ERR("curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
}
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
curl_slist_free_all(headers);
curl_easy_reset(curl);
return http_code;
}
When somewhere in the program I want to call the WS I call the desired function as curl_post(url).
Am I doing this correctly or are there any flaws in this implementation?
I am a beginner in both C programming and libcurl and writing a program to fetch 1000 data values from a website. The website provides a job number and is redirected into another page for the results. Since, the code I have written is almost 500 lines, I am giving a general flow of the program and a short code which I think is the problematic area:
for(row=0;row<1000;row++)
{
------
url = "http://example.com";
curl_global_init(CURL_GLOBAL_ALL);
curlHandle = curl_easy_init();
if(curlHandle)
{
curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800);
curl_easy_setopt(curlHandle, CURLOPT_ERRORBUFFER, curlErrStr);
curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curlHandle, CURLOPT_URL, url);
curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_LIMIT, dl_lowspeed_bytes);
curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_TIME, dl_lowspeed_time);
curl_easy_setopt(curlHandle, CURLOPT_VERBOSE, 1L);
free(url);
curlErr = curl_easy_perform(curlHandle);
if(curlErr != CURLE_OK)
{
fprintf(stderr, "curl_easy_perform() failed: %s\n",curl_easy_strerror(curlErr));
}
else
{
curlErr = curl_easy_getinfo(curlHandle, CURLINFO_EFFECTIVE_URL, &url_new);
if((CURLE_OK == curlErr) && url_new)
{
sprintf(job,"%.*s\n", 18, url_new + 28);
if((ptr1 = strchr(job, '\n')) != NULL)
*ptr1 = '\0';
init_string(&s);
curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800 );
curl_easy_setopt(curlHandle, CURLOPT_URL, url_new);
curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, writefunc);
curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &s);
curlErr1 = curl_easy_perform(curlHandle);
printf("###### %lu\t%s\n",strlen(s.ptr),s.ptr);
free(s.ptr);
}
curl_easy_cleanup(curlHandle);
}
}
The functions are:
struct string
{
char *ptr;
size_t len;
};
void init_string(struct string *a)
{
a->len = 0;
a->ptr = malloc(a->len+1);
if (a->ptr == NULL)
{
fprintf(stderr, "malloc() failed\n");
exit(EXIT_FAILURE);
}
a->ptr[0] = '\0';
}
size_t writefunc(void *ptr, size_t size, size_t nmemb, struct string *a)
{
size_t new_len = a->len + size*nmemb;
a->ptr = realloc(a->ptr, new_len+1);
if (a->ptr == NULL)
{
fprintf(stderr, "realloc() failed\n");
exit(EXIT_FAILURE);
}
memcpy(a->ptr+a->len, ptr, size*nmemb);
a->ptr[new_len] = '\0';
a->len = new_len;
return size*nmemb;
}
The program shows no error of any kind. But out of the 1000 data, almost 50% couldn't be fetched due to curl_easy_perform() failed: Timeout was reached; and 20% of them have the output of the line strlen(s.ptr),s.ptr => 0. The rest are fetched correctly.
The verbose option for the zero output gave the following:
Connection #0 to host www.example.com left intact
getaddrinfo(3) failed for :80
Couldn't resolve host ''
Closing connection #1
Couldn't resolve host name
0
Please suggest the possible errors in the program.
Here is how I would fetch data using cURL
static CURL *curl = NULL;
CURL *initCURL(void)
{
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if(curl)
{
// now set all the desired options
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
/* example.com is redirected, so we tell libcurl to follow redirection */
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
// etc
}
else
{ // else cURL object creation failed
// display appropriate error message
}
}
void endCurl(void)
{
// and then when all done with the cURL object,
// cleanup
curl_easy_cleanup(curl);
}
CURLcode execCurl( CURL *curl )
{
CURLcode res;
// Perform this request, for each fetch
res = curl_easy_perform(curl);
// Check for errors
if(res != CURLE_OK)
{
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
return( res );
}
Note:
I have had this same problem with the cURL timeout occurring.
The best recovery method I found is:
when a timeout occurs, retry the communication, requesting the same data