C threads corrupting each other - c
So I've got a weird issue that I don't quite understand why it is happening. In md4checker, I launch n pthreads that get and check an MD4 hash. In md4.c, I generate an MD4 hash. If I set n threads to 1, it works flawlessly. It generates the MD4 hash with perfect accuracy (I ran it in a loop for 1,000,000 tries and not a single time did it fail). However, when I run this same code with n threads as 2 (or higher) it fails a lot and randomly.
The md4.c file is derivative of another I found online but I tweaked it a little because the original md4.c had a memory leak (and running 50,000,000+ hashes made that leak fill up 16GB of RAM in about 15 minutes). If it was just a matter of it not working, I'd know where to start but I'm genuinely at a loss as to where and why multiple threads corrupt each other here.
edit: If I add usleep(100) to the worker thread in md4checker.c, it cuts the failure rate to 10% of what it normally does.
md4checker.c (works when running just one):
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/sysinfo.h>
#include "md4.c"
struct data{
char hash[33];
int done;
};
void *worker(void *ptr) {
int count=0;
char hash[33];
strcpy(hash, ((struct data *)ptr)->hash);
hash[32] ='\0';
char *md4;
int fails =0;
int runs =1000;
while(count < runs) {
md4 = MD4("cbff7", 5);
if(strcmp(md4, hash) != 0) {
++fails;
}
free(md4);
count++;
}
((struct data *)ptr)->done = 1;
printf("Done. Failed %d/%d times.\n", fails, runs);
}
void runprocs(int procs) {
printf("Running process on %d thread(s)\n", procs);
struct data d ={
.hash = "4e0d289576880188d4b968fe626bccef\0",
.done =0
};
pthread_t threads[procs];
void *ptr =&d;
for(int i=0; i<procs; ++i) {
int rc = pthread_create(&threads[i], NULL, worker, ptr);
}
while (!d.done) {
usleep(10000);
}
}
int main(int argc, char *argv[]) {
if (argc < 2) return -1;
runprocs(1);
runprocs(2);
runprocs(4);
}
After running this four times the output I got was:
Run one:
Running process on 1 thread(s)
Done. Failed 0/1000 times.
Running process on 2 thread(s)
Done. Failed 490/1000 times.
Done. Failed 489/1000 times.
Running process on 4 thread(s)
Done. Failed 941/1000 times.
Done. Failed 883/1000 times.
Done. Failed 847/1000 times.
Done. Failed 473/1000 times.
Run two:
Running process on 1 thread(s)
Done. Failed 0/1000 times.
Running process on 2 thread(s)
Done. Failed 19/1000 times.
Done. Failed 17/1000 times.
Running process on 4 thread(s)
Done. Failed 953/1000 times.
Done. Failed 891/1000 times.
Done. Failed 884/1000 times.
Done. Failed 850/1000 times.
Run three:
Running process on 1 thread(s)
Done. Failed 0/1000 times.
Running process on 2 thread(s)
Done. Failed 431/1000 times.
Done. Failed 371/1000 times.
Running process on 4 thread(s)
Done. Failed 931/1000 times.
Done. Failed 928/1000 times.
Done. Failed 720/1000 times.
Done. Failed 703/1000 times.
Run four:
Running process on 1 thread(s)
Done. Failed 0/1000 times.
Running process on 2 thread(s)
Done. Failed 82/1000 times.
Done. Failed 84/1000 times.
Running process on 4 thread(s)
Done. Failed 909/1000 times.
Done. Failed 928/1000 times.
Done. Failed 790/1000 times.
Done. Failed 808/1000 times.
The first line in each set is perfect (done from main thread). Then it runs it 1,000 times in two new threads and they both print the failed/run result (as you can see in the code above). So why the random number of fails? I'm very confused here, lol. Any help would be greatly appreciated.
md4.c:
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
char *MD4(char *, int); //this is the prototype you want to call. Everything else is internal.
static uint32_t *MD4Digest(uint32_t *w, int len);
static void setMD4Registers(uint32_t, uint32_t, uint32_t, uint32_t);
static uint32_t changeEndianness(uint32_t);
static void resetMD4Registers(void);
static uint32_t stringToUint32(char *);
static const char *BASE16 = "0123456789abcdef=";
#define F(X,Y,Z) (((X)&(Y))|((~(X))&(Z)))
#define G(X,Y,Z) (((X)&(Y))|((X)&(Z))|((Y)&(Z)))
#define H(X,Y,Z) ((X)^(Y)^(Z))
#define LEFTROTATE(A,N) ((A)<<(N))|((A)>>(32-(N)))
#define MD4ROUND1(a,b,c,d,x,s) a += F(b,c,d) + x; a = LEFTROTATE(a, s);
#define MD4ROUND2(a,b,c,d,x,s) a += G(b,c,d) + x + (uint32_t)0x5A827999; a = LEFTROTATE(a, s);
#define MD4ROUND3(a,b,c,d,x,s) a += H(b,c,d) + x + (uint32_t)0x6ED9EBA1; a = LEFTROTATE(a, s);
static uint32_t A = 0x67452301;
static uint32_t B = 0xefcdab89;
static uint32_t C = 0x98badcfe;
static uint32_t D = 0x10325476;
void Concat(char **out, int olen, char* second, int slen) {
if(*out == NULL ) {
*out = malloc(1);
*out[1] = '\0';
}
char *old = *out; // Grab the original string.
//int len = (sizeof(char)*((strlen(old)+strlen(second)+1))); // Get the length of the combined strings plus 1 for \0
*out = malloc(olen+slen+1); // Create the new char array to hold the combined strings.
memset(*out, 0, olen+slen+1); // Set all bits to zero in new array.
char *p = *out; // We'll use p to track position for writing the values.
//strcpy(p, old); // Copy the original string to p;
memcpy(p, old, olen);
p += olen; // Move p forward by the length of old.
//strcpy(p, second); // Copy the second string to p
memcpy(p, second, slen);
free(old); // Free old to prevent memory leak.
free(second);
}
int Expand(char **out, int amt) {
int len = strlen(*out)+amt; // Get the length of the array + expand amount \0
char *new; // Create a new pointer.
new = malloc(sizeof(char)*len); // Create the new char array
memset(new, 0, sizeof(char)*len); // Set all bits to zero in new array.
strcpy(new, *out); // Copy the original string to new array;
free(*out); // Free the original memory to prevent leak
*out = new;
return len; // Return the new memory size
}
char *base16Encode(char *in, int len){
char *out = malloc(len*2);
int i,j;
j=0;
for(i=0; i<len; i++){
out[j++]=BASE16[((in[i] & 0xF0)>>4)];
out[j++]=BASE16[(in[i] & 0x0F)];
}
out[j]='\0';
free(in);
return out;
}
uint32_t stringToUint32(char *c){
uint32_t l;
int i;
l=0;
for(i=0; i<4; i++){
l = l|(((uint32_t)((unsigned char)c[i]))<<(8*(3-i)));
}
return l;
}
char *uint32ToString(uint32_t l){
char *c = malloc(sizeof(uint32_t)+1);
memset(c, 0, sizeof(uint32_t)+1);
int i;
for(i=0; i<4; i++){
c[i] = (l >> (8*(3-i))) & 0xFF;
}
return c;
}
char *MD4(char *str, int temporaryvar){
uint64_t mlen=strlen(str); // Get the length of str + 1 for \0
uint64_t slen=mlen;
char *m = malloc(mlen+1); // Create a pointer to manipulate data and give it an array of size mlen
strcpy(m, str); // Copy str to m
m[mlen] = '\0'; // Set the last value to 0.
unsigned char *oneByte = malloc(sizeof(char));
oneByte[0] = 0x80;
Concat(&m, mlen, oneByte, 1); // Add the 1 byte.
int i, wlen;
mlen=strlen(m);
i=((56-mlen)%64);
if(i<0) i+=64;
mlen = Expand(&m, i);
uint32_t *w = malloc(sizeof(uint32_t)*(mlen/4+2));
for(i=0; i<mlen/4; i++){
w[i]=stringToUint32(m+(4*i));
}
w[i++] = (slen<<3) & 0xFFFFFFFF;
w[i++] = (slen>>29) & 0xFFFFFFFF;
wlen=i;
for(i=0; i<wlen-2; ++i){
w[i]=changeEndianness(w[i]);
}
uint32_t *hash = MD4Digest(w,wlen);
char *digest = malloc(1);
memset(digest, 0, 1);
//digest=newString(NULL,0);
for(i=0; i<4; i++){
hash[i]=changeEndianness(hash[i]);
Concat(&digest, sizeof(uint32_t)*i,uint32ToString(hash[i]), sizeof(uint32_t));
}
// Don't forget to free up your memory.
free(m);
free(w);
free(hash);
return base16Encode(digest, sizeof(uint32_t)*4);
}
uint32_t *MD4Digest(uint32_t *w, int len){
//assumes message.len is a multiple of 64 bytes.
int i,j;
uint32_t X[16];
uint32_t *digest = malloc(sizeof(uint32_t)*4);
uint32_t AA, BB, CC, DD;
for(i=0; i<len/16; i++){
for(j=0; j<16; j++){
X[j]=w[i*16+j];
}
AA=A;
BB=B;
CC=C;
DD=D;
MD4ROUND1(A,B,C,D,X[0],3);
MD4ROUND1(D,A,B,C,X[1],7);
MD4ROUND1(C,D,A,B,X[2],11);
MD4ROUND1(B,C,D,A,X[3],19);
MD4ROUND1(A,B,C,D,X[4],3);
MD4ROUND1(D,A,B,C,X[5],7);
MD4ROUND1(C,D,A,B,X[6],11);
MD4ROUND1(B,C,D,A,X[7],19);
MD4ROUND1(A,B,C,D,X[8],3);
MD4ROUND1(D,A,B,C,X[9],7);
MD4ROUND1(C,D,A,B,X[10],11);
MD4ROUND1(B,C,D,A,X[11],19);
MD4ROUND1(A,B,C,D,X[12],3);
MD4ROUND1(D,A,B,C,X[13],7);
MD4ROUND1(C,D,A,B,X[14],11);
MD4ROUND1(B,C,D,A,X[15],19);
MD4ROUND2(A,B,C,D,X[0],3);
MD4ROUND2(D,A,B,C,X[4],5);
MD4ROUND2(C,D,A,B,X[8],9);
MD4ROUND2(B,C,D,A,X[12],13);
MD4ROUND2(A,B,C,D,X[1],3);
MD4ROUND2(D,A,B,C,X[5],5);
MD4ROUND2(C,D,A,B,X[9],9);
MD4ROUND2(B,C,D,A,X[13],13);
MD4ROUND2(A,B,C,D,X[2],3);
MD4ROUND2(D,A,B,C,X[6],5);
MD4ROUND2(C,D,A,B,X[10],9);
MD4ROUND2(B,C,D,A,X[14],13);
MD4ROUND2(A,B,C,D,X[3],3);
MD4ROUND2(D,A,B,C,X[7],5);
MD4ROUND2(C,D,A,B,X[11],9);
MD4ROUND2(B,C,D,A,X[15],13);
MD4ROUND3(A,B,C,D,X[0],3);
MD4ROUND3(D,A,B,C,X[8],9);
MD4ROUND3(C,D,A,B,X[4],11);
MD4ROUND3(B,C,D,A,X[12],15);
MD4ROUND3(A,B,C,D,X[2],3);
MD4ROUND3(D,A,B,C,X[10],9);
MD4ROUND3(C,D,A,B,X[6],11);
MD4ROUND3(B,C,D,A,X[14],15);
MD4ROUND3(A,B,C,D,X[1],3);
MD4ROUND3(D,A,B,C,X[9],9);
MD4ROUND3(C,D,A,B,X[5],11);
MD4ROUND3(B,C,D,A,X[13],15);
MD4ROUND3(A,B,C,D,X[3],3);
MD4ROUND3(D,A,B,C,X[11],9);
MD4ROUND3(C,D,A,B,X[7],11);
MD4ROUND3(B,C,D,A,X[15],15);
A+=AA;
B+=BB;
C+=CC;
D+=DD;
}
digest[0]=A;
digest[1]=B;
digest[2]=C;
digest[3]=D;
resetMD4Registers();
return digest;
}
uint32_t changeEndianness(uint32_t x){
return ((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) | ((x & 0xFF000000) >> 24);
}
void setMD4Registers(uint32_t AA, uint32_t BB, uint32_t CC, uint32_t DD){
A=AA;
B=BB;
C=CC;
D=DD;
}
void resetMD4Registers(void){
setMD4Registers(0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476);
}
So why the random number of fails?
The MD4 code presented is not thread safe, and you are adding a bit of thread-unsafety of your own.
Observe in particular variables A, B, C, and D in file md4.c. These are declared at file scope and without the _Thread_local qualifier, so they have static storage duration and are shared by all threads in the process. These are modified during the computation, so you have data races involving all of these. The resulting behavior is undefined, and it shouldn't be hard to imagine how it might mess things up if multiple threads were clobbering the values that each other had written in those variables.
As for your own code, with each call to runprocs(), the main thread and each new one created all share the same struct data object, which the threads read and modify and the main thread reads, all without synchronization. This also causes undefined behavior, though it looks like this could be rescued by engaging a mutex or other synchronization mechanism.
Additionally, the MD4 code appears to be deterministic -- given the same input, it will always (if run single-threaded to avoid undefined behavior) produce the same output. It is therefore unclear what you seek to accomplish by running it in multiple threads on the same input.
Also, the while(!d.done) loop is pointless and poor form. You should be joining each thread via pthread_join() to clean up its resources after it, and since that has the (primary) effect of waiting for the thread to terminate, you don't need to also roll your own wait for termination.
Related
DES CBC mode not outputting correctly
I am working on a project in C to implement CBC mode on top of a skeleton code for DES with OpenSSL. We are not allowed to use a function that does the CBC mode automatically, in the sense that we must implement it ourselves. I am getting output but I have result files and my output is not matching up completely with the intended results. I also am stuck on figuring out how to pad the file to ensure all the blocks are of equal size, which is probably one of the reasons why I'm not receiving the correct output. Any help would be appreciated. Here's my modification of the skeleton code so far: #include <stdio.h> #include <stdlib.h> #include <string.h> #include <openssl/des.h> #include <sys/time.h> #include <unistd.h> #define ENC 1 #define DEC 0 DES_key_schedule key; int append(char*s, size_t size, char c) { if(strlen(s) + 1 >= size) { return 1; } int len = strlen(s); s[len] = c; s[len+1] = '\0'; return 0; } int getSize (char * s) { char * t; for (t = s; *t != '\0'; t++) ; return t - s; } void strToHex(const_DES_cblock input, unsigned char *output) { int arSize = 8; unsigned int byte; for(int i=0; i<arSize; i++) { if(sscanf(input, "%2x", &byte) != 1) { break; } output[i] = byte; input += 2; } } void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) { DES_LONG temp[2]; memcpy(temp, roundOutput, 8*sizeof(unsigned char)); for(int i=0; i<2; i++) { xorValue[i] = temp[i] ^ data[i]; } } void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) { DES_LONG in[2]; doBitwiseXor(in, data, roundOutput); DES_encrypt1(in,&key,ENC); printf("ENCRYPTED\n"); printvalueOfDES_LONG(in); printf("%s","\n"); fwrite(in, 8, 1, outFile); memcpy(roundOutput, in, 2*sizeof(DES_LONG)); } int main(int argc, char** argv) { const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef}; const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef}; // Initialize the timing function struct timeval start, end; gettimeofday(&start, NULL); int l; if ((l = DES_set_key_checked(&cbc_key,&key)) != 0) printf("\nkey error\n"); FILE *inpFile; FILE *outFile; inpFile = fopen("test.txt", "r"); outFile = fopen("test_results.txt", "wb"); if(inpFile && outFile) { unsigned char ch; // A char array that will hold all 8 ch values. // each ch value is appended to this. unsigned char eight_bits[8]; // counter for the loop that ensures that only 8 chars are done at a time. int count = 0; while(!feof(inpFile)) { // read in a character ch = fgetc(inpFile); // print the character printf("%c",ch); // append the character to eight_bits append(eight_bits,1,ch); // increment the count so that we only go to 8. count++; const_DES_cblock roundOutput; // When count gets to 8 if(count == 8) { // for formatting printf("%s","\n"); // Encrypt the eight characters and store them back in the char array. //DES_encrypt1(eight_bits,&key,ENC); doCBCenc(eight_bits, roundOutput, outFile); // prints out the encrypted string int k; for(k = 0; k < getSize(eight_bits); k++){ printf("%c", eight_bits[k]); } // Sets count back to 0 so that we can do another 8 characters. count = 0; // so we just do the first 8. When everything works REMOVE THE BREAK. //break; } } } else { printf("Error in opening file\n"); } fclose(inpFile); fclose(outFile); // End the timing gettimeofday(&end, NULL); // Initialize seconds and micros to hold values for the time output long seconds = (end.tv_sec - start.tv_sec); long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); // Output the time printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros); }
Your crypto is at least half correct, but you have a lot of actual or potential other errors. As you identified, raw CBC mode can only encrypt data which is a multiple of the block size, for DES 64 bits or 8 bytes (on most modern computers and all where you could use OpenSSL). In some applications this is okay; for example if the data is (always) an MD5 or SHA-256 or SHA-512 hash, or a GUID, or an IPv6 (binary) address, then it is a block multiple. But most applications want to handle at least any length in bytes, so they need to use some scheme to pad on encrypt and unpad on decrypt the last block (all blocks before the last already have the correct size). Many different schemes have been developed for this, so you need to know which to use. I assume this is a school assignment (since no real customer would set such a stupid and wasteful combination of requirements) and this should either have been specified or clearly left as a choice. One padding scheme very common today (although not for single-DES, because that is broken, unsafe, obsolete, and not common) is the one defined by PKCS5 and generalized by PKCS7 and variously called PKCS5, PKCS7, or PKCS5/7 padding, so I used that as an example. Other than that: you try to test feof(inpFile) before doing fgetc(inpFile). This doesn't work in C. It results in your code treating the low 8 bits of EOF (255 aka 0xFF on practically all implementations) as a valid data character added to the characters that were actually in the file. The common idiom is to store the return of getchar/getc/fgetc in a signed int and compare to EOF, but that would have required more changes so I used an alternate. you don't initialize eight_bits which is a local-scope automatic duration variable, so its contents are undefined and depending on the implementation are often garbage, which means trying to 'append' to it by using strlen() to look for the end won't work right and might even crash. Although on some implementations at least some times it might happen to contain zero bytes, and 'work'. In addition it is possible in C for a byte read from a file (and stored here) to be \0 which will also make this work wrong, although if this file contains text, as its name suggests, it probably doesn't contain any \0 bytes. once you fill eight_bits you write 'off-the-end' into element [8] which doesn't exist. Technically this is Undefined Behavior and anything at all can happen, traditionally expressed on Usenet as nasal demons. Plus after main finishes the first block it doesn't change anything in eight_bits so all further calls to append find it full and discard the new character. while you could fix the above points separately, a much simple solution is available: you are already using count to count the number of bytes in the current block, so just use it as the subscript. roundOutput is also an uninitialized local/auto variable within the loop, which is then used as the previous block for the CBC step, possibly with garbage or wrong value(s). And you don't use the IV at all, as is needed. You should allocate this before the loop (so it retains its value through all iterations) and initialize it to the IV, and then for each block in the loop your doCBCenc can properly XOR it to the new block and then leave the encrypted new block to be used next time. your code labelled 'prints out the encrypted string' prints plaintext not ciphertext -- which is binary and shouldn't be printed directly anyway -- and is not needed because your file-read loop already echoes each character read. But if you do want to print a (validly null-terminated) string it's easier to just use fputs(s) or [f]printf([f,]"%s",s) or even fwrite(s,1,strlen(s),f). your doCBCenc has a reference to printvalueofDES_LONG which isn't defined anywhere, and which along with two surrounding printf is clearly not needed. you should use a cast to convert the first argument to doCBCenc -- this isn't strictly required but is good style and a good compiler (like mine) complains if you don't finally, when an error occurs you usually print a message but then continue running, which will never work right and may produce symptoms that disguise the problem and make it hard to fix. The below code fixes the above except that last (which would have been more work for less benefit) plus I removed routines that are now superfluous, and the timing code which is just silly: Unix already has builtin tools to measure and display process time more easily and reliably than writing code. Code I 'removed' is under #if 0 for reference, and code I added under #else or #if 1 except for the cast. The logic for PKCS5/7 padding is under #if MAYBE so it can be either selected or not. Some consider it better style to use sizeof(DES_block) or define a macro instead of the magic 8's, but I didn't bother -- especially since it would have required changes that aren't really necessary. // SO70209636 #include <stdio.h> #include <stdlib.h> #include <string.h> #include <openssl/des.h> #include <sys/time.h> #include <unistd.h> #define ENC 1 #define DEC 0 DES_key_schedule key; #if 0 int append(char*s, size_t size, char c) { if(strlen(s) + 1 >= size) { return 1; } int len = strlen(s); s[len] = c; s[len+1] = '\0'; return 0; } int getSize (char * s) { char * t; for (t = s; *t != '\0'; t++) ; return t - s; } void strToHex(const_DES_cblock input, unsigned char *output) { int arSize = 8; unsigned int byte; for(int i=0; i<arSize; i++) { if(sscanf(input, "%2x", &byte) != 1) { break; } output[i] = byte; input += 2; } } #endif void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) { DES_LONG temp[2]; memcpy(temp, roundOutput, 8*sizeof(unsigned char)); for(int i=0; i<2; i++) { xorValue[i] = temp[i] ^ data[i]; } } void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) { DES_LONG in[2]; doBitwiseXor(in, data, roundOutput); DES_encrypt1(in,&key,ENC); #if 0 printf("ENCRYPTED\n"); printvalueOfDES_LONG(in); printf("%s","\n"); #endif fwrite(in, 8, 1, outFile); memcpy(roundOutput, in, 2*sizeof(DES_LONG)); } int main(int argc, char** argv) { const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef}; const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef}; #if 0 // Initialize the timing function struct timeval start, end; gettimeofday(&start, NULL); #endif int l; if ((l = DES_set_key_checked(&cbc_key,&key)) != 0) printf("\nkey error\n"); #if 1 DES_cblock roundOutput; // must be outside the loop memcpy (roundOutput, IV, 8); // and initialized #endif FILE *inpFile; FILE *outFile; inpFile = fopen("test.txt", "r"); outFile = fopen("test.encrypt", "wb"); if(inpFile && outFile) { unsigned char ch; // A char array that will hold all 8 ch values. // each ch value is appended to this. unsigned char eight_bits[8]; // counter for the loop that ensures that only 8 chars are done at a time. int count = 0; #if 0 while(!feof(inpFile)) { // read in a character ch = fgetc(inpFile); #else while( ch = fgetc(inpFile), !feof(inpFile) ){ #endif // print the character printf("%c",ch); #if 0 // append the character to eight_bits append(eight_bits,1,ch); // increment the count so that we only go to 8. count++; #else eight_bits[count++] = ch; #endif #if 0 const_DES_cblock roundOutput; #endif // When count gets to 8 if(count == 8) { // for formatting printf("%s","\n"); // Encrypt the eight characters and store them back in the char array. //DES_encrypt1(eight_bits,&key,ENC); doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile); #if 0 // prints out the encrypted string int k; for(k = 0; k < getSize(eight_bits); k++){ printf("%c", eight_bits[k]); } #endif // Sets count back to 0 so that we can do another 8 characters. count = 0; // so we just do the first 8. When everything works REMOVE THE BREAK. //break; } } #if MAYBE memset (eight_bits+count, 8-count, 8-count); // PKCS5/7 padding doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile); #endif } else { printf("Error in opening file\n"); } fclose(inpFile); fclose(outFile); #if 0 // End the timing gettimeofday(&end, NULL); // Initialize seconds and micros to hold values for the time output long seconds = (end.tv_sec - start.tv_sec); long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); // Output the time printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros); #endif } PS: personally I wouldn't put the fwrite in doCBCenc; I would only do the encryption and let the caller do whatever I/O is appropriate which might in some cases not be fwrite. But what you have is not wrong for the requirements you apparently have.
Multithreaded reading/doing things with chars from character array in C
I am trying to read a character array that contains the contents of many large files. The character array is going to be quite large, because the files are large, so I want to do it using multithreading (pthread). I want the user to be able to designate how many threads they want to run. I have something working, but increasing the number of threads does nothing to affect performance (i.e. 1 thread finishes just as fast as 10). In fact, it seems to be just the opposite: telling the program to use 10 threads runs much slower than telling it to use 1. Here is the method for slicing up the character array according to the number of threads the user passes to the program. I know this is wrong, I could use some advice here. //Universal variables int numThreads; size_t sizeOfAllFiles; // Size, in bytes, of allFiles char* allFiles; // Where all of the files are stored, together void *zip(void *nthread); void *zip(void *nThread) { int currentThread = *(int*)nThread; int remainder = sizeOfAllFiles % currentThread; int slice = (sizeOfAllFiles-remainder) / currentThread; // I subtracted the remainder for my testing // because I didn't want to worry about whether // the char array's size is evenly divisible by numThreads int i = (slice * (currentThread-1)); char currentChar = allFiles[i]; //Used for iterating while(i<(slice * currentThread) && i>=(slice * (currentThread-1))) { i++; // Do things with the respective thread's // 'slice' of the array. ..... } return 0; } And here is how I am spawning the threads, which I am almost positive that I am doing correctly: for (int j = 1; j <= threadNum; j++) { k = malloc(sizeof(int)); *k = j; if (pthread_create (&thread[j], NULL, zip, k) != 0) { printf("Error\n"); free(thread); exit(EXIT_FAILURE); } } for (int i = 1; i <= threadNum; i++) pthread_join (thread[i], NULL); This is all really confusing for me so if I could get some help on this, I'd greatly appreciate it. I specifically am struggling with the slicing part (cutting it up correctly), and with not seeing performance gains by using more than one thread. Thanks in advance.
I'm starting by throwing a test program at you: #include <assert.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <stddef.h> #include <time.h> bool EnlargeBuffer(char ** const buffer_pointer, size_t * const buffer_size) { char * larger_buffer = realloc(*buffer_pointer, 2 * *buffer_size); if (! larger_buffer) { larger_buffer = realloc(*buffer_pointer, *buffer_size + 100); if (! larger_buffer) { return false; } *buffer_size += 100; } else { *buffer_size *= 2; } *buffer_pointer = larger_buffer; printf("(Buffer size now at %zu)\n", *buffer_size); return true; } bool ReadAll(FILE * const source, char ** pbuffer, size_t * pbuffer_size, size_t * pwrite_index) { int c; while ((c = fgetc(source)) != EOF) { assert(*pwrite_index < *pbuffer_size); (*pbuffer)[(*pwrite_index)++] = c; if (*pwrite_index == *pbuffer_size) { if (! EnlargeBuffer(pbuffer, pbuffer_size)) { free(*pbuffer); return false; } } } if (ferror(source)) { free(*pbuffer); return false; } return true; } unsigned CountAs(char const * const buffer, size_t size) { unsigned count = 0; while (size--) { if (buffer[size] == 'A') ++count; } return count; } int main(int argc, char ** argv) { char * buffer = malloc(100); if (! buffer) return 1; size_t buffer_size = 100; size_t write_index = 0; clock_t begin = clock(); for (int i = 1; i < argc; ++i) { printf("Reading %s now ... \n", argv[i]); FILE * const file = fopen(argv[i], "r"); if (! file) return 1; if (! ReadAll(file, &buffer, &buffer_size, &write_index)) { return 1; } fclose(file); } clock_t end = clock(); printf("Reading done, took %f seconds\n", (double)(end - begin) / CLOCKS_PER_SEC); begin = clock(); unsigned const as = CountAs(buffer, write_index); end = clock(); printf("All files have %u 'A's, counting took %f seconds\n", as, (double)(end - begin) / CLOCKS_PER_SEC); } This program reads all files (passed as command line arguments) into one big large char * buffer, and then counts all bytes which are == 'A'. It also times both of these steps. Example run with (shortened) output on my system: # gcc -Wall -Wextra -std=c11 -pedantic allthefiles.c # dd if=/dev/zero of=large_file bs=1M count=1000 # ./a.out allthefiles.c large_file Reading allthefiles.c now ... (Buffer size now at 200) ... (Buffer size now at 3200) Reading large_file now ... (Buffer size now at 6400) (Buffer size now at 12800) ... (Buffer size now at 1677721600) Reading done, took 4.828559 seconds All files have 7 'A's, counting took 0.764503 seconds Reading took almost 5 seconds, but counting (= iterating once, in a single thread, over all bytes) took a bit less than 1 second. You're optimizing at the wrong place! Using 1 thread to read all files, and then using N threads to operate on that one buffer isn't going to bring you places. The fastest way to read 1 file is to use 1 thread. For multiple files, use 1 thread per file! So, in order to achieve the speedup that you need to show for your assignment: Create a pool of threads with variable size. Have a pool of tasks, where each task consists of read one file compute it's run-length encoding store the run-length encoded file let the threads take tasks from your task pool. Things to consider: How do you combine the results of each task? Without requiring (costly) synchronization.
Changing parts of arrays/structs/.. in threads without blocking the whole thing, in pure c
I want to modify some (not all) fields of an array (or structs) in multiple threads, with out blocking the rest of the array as the rest of it is being modified in other threads. How is this achieved? I found some answers, but they are for C++ and I want to do it in C. Here is the code I got so far: #define _GNU_SOURCE #include <pthread.h> #include <stdio.h> #include <semaphore.h> #include <stdlib.h> #include <time.h> #include <unistd.h> #define ARRAYLENGTH 5 #define TARGET 10000 int target; typedef struct zstr{ int* array; int place; int run; pthread_mutex_t* locks; }zstr; void *countup(void *); int main(int argc, char** args){ int al; if(argc>2){ al=atoi(args[1]); target=atoi(args[2]); }else{ al=ARRAYLENGTH; target=TARGET; } printf("%d %d\n", al, target); zstr* t=malloc(sizeof(zstr)); t->array=calloc(al, sizeof(int)); t->locks=calloc(al, sizeof(pthread_mutex_t)); int* rua=calloc(al, sizeof(int)); pthread_t id[4*al]; for(int i=0; i<al; i++) pthread_mutex_init(&(t->locks[i]), NULL); for(int j=0; j<4*al; j++){ int st=j%al; t->run=rua[st]++; t->place=st; pthread_create(&id[j], NULL, &countup, t); } for(int k=0; k<4*al; k++){ pthread_join(id[k], NULL); } for(int u=0; u<al; u++) printf("%d\n", t->array[u]); free(rua); free(t->locks); free(t->array); return 0; } void *countup(void* table){ zstr* nu=table; if(!nu->run){ pthread_mutex_lock(nu->locks + nu->place); }else{ pthread_mutex_trylock(nu->locks + nu->place); } while(nu->array[nu->place]<target) nu->array[nu->place]++; pthread_mutex_unlock(nu->locks + nu->place); return NULL; } Sometimes this works just fine, but then calculates wrong values and for quiet sort problems (like the default values), it takes super long (strangely it worked once when I handed them in as parameters).
There isn't anything special about part of an array or structure. What matters is that the mutex or other synchronization you apply to a given value is used correctly. In this case, it seems like you're not checking your locking function results. The design of the countup function only allows a single thread to ever access the object, running the value all the way up to target before releasing the lock, but you don't check the trylock result. So what's probably happening is the first thread gets the lock, and subsequent threads on the same mutex call trylock and fail to get the lock, but the code doesn't check the result. Then you get multiple threads incrementing the same value without synchronization. Given all the pointer dereferences the index and increment operations are not guaranteed to be atomic, leading to problems where the values grow well beyond target. The moral of the story is to check function results and handle errors.
Sorry, don't have enough reputation to comment, yet. Adding to Brad's comment of not checking the result of pthread_mutex_trylock, there's a misconception that shows many times with Pthreads: You assume, that pthread_create will start immediately, and receive the values passed (here pointer t to your struct) and it's content read atomically. That is not true. The thread might start any time later and will find the contents, like t->run and t->place already changed by the next iteration of the j-loop in main. Moreover, you might want to read David Butenhof's book "Programming with Posix Threads" (old, but still a good reference) and check on synchronization and condition variables. It's not that good style to start that many threads in the first place ;) As this has come up a few times and might come up again, I have restructured that a bit to issue work_items to the started threads. The code below might be amended by a function, that maps the index into array to always the same area_lock, or by adding a queue to feed the running threads with further work-item... #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <unistd.h> #include <pthread.h> /* * Macros for default values. To make it more interesting, set: * ARRAYLENGTH != THREADS * INCREMENTS != TARGET * NUM_AREAS != THREADS * Please note, that NUM_AREAS must be <= ARRAY_LENGTH. */ #define ARRAYLENGTH 10 #define TARGET 100 #define INCREMENTS 10 #define NUM_AREAS 2 #define THREADS 5 /* These variables are initialized once in main, then only read... */ int array_len; int target; int num_areas; int threads; int increments; /** * A long array that is going to be equally split into number of areas. * Each area is covered by a lock. The number of areas do not have to * equal the length of the array, but must be smaller... */ typedef struct shared_array { int * array; int num_areas; pthread_mutex_t * area_locks; } shared_array; /** * A work-item a thread is assigned to upon startup (or later on). * Then a value of { 0, any } might signal the ending of this thread. * The thread is working on index within zstr->array, counting up increments * (or up until the target is reached). */ typedef struct work_item { shared_array * zstr; int work_on_index; int increments; } work_item; /* Local function declarations */ void * countup(void *); int main(int argc, char * argv[]) { int i; shared_array * zstr; if (argc == 1) { array_len = ARRAYLENGTH; target = TARGET; num_areas = NUM_AREAS; threads = THREADS; increments = INCREMENTS; } else if (argc == 6) { array_len = atoi(argv[1]); target = atoi(argv[2]); num_areas = atoi(argv[3]); threads = atoi(argv[4]); increments = atoi(argv[5]); } else { fprintf(stderr, "USAGE: %s len target areas threads increments", argv[0]); exit(-1); } assert(array_len >= num_areas); zstr = malloc(sizeof (shared_array)); zstr->array = calloc(array_len, sizeof (int)); zstr->num_areas = num_areas; zstr->area_locks = calloc(num_areas, sizeof (pthread_mutex_t)); for (i = 0; i < num_areas; i++) pthread_mutex_init(&(zstr->area_locks[i]), NULL); pthread_t * id = calloc(threads, sizeof (pthread_t)); work_item * work_items = calloc(threads, sizeof (work_item)); for (i = 0; i < threads; i++) { work_items[i].zstr = zstr; work_items[i].work_on_index = i % array_len; work_items[i].increments = increments; pthread_create(&(id[i]), NULL, &countup, &(work_items[i])); } // Let's just do this one work-item. for (i = 0; i < threads; i++) { pthread_join(id[i], NULL); } printf("Array: "); for (i = 0; i < array_len; i++) printf("%d ", zstr->array[i]); printf("\n"); free(id); free(work_items); free(zstr->area_locks); free(zstr->array); return 0; } void *countup(void* first_work_item) { work_item * wi = first_work_item; int inc; // Extract the information from this work-item. int idx = wi->work_on_index; int area = idx % wi->zstr->num_areas; pthread_mutex_t * lock = &(wi->zstr->area_locks[area]); pthread_mutex_lock(lock); for (inc = wi->increments; inc > 0 && wi->zstr->array[idx] < target; inc--) wi->zstr->array[idx]++; pthread_mutex_unlock(lock); return NULL; }
Multiple producer single consumer with Circular Buffer
Need help in getting the following to work. I have a multiple producer threads (each writing say 100 bytes of data) to ringbuffer. And one single reader(consumer) thread ,reads 100 bytes at a time and writes to stdout.(Finally i want to write to files based on the data) With this implementation ,I get the data read from ring buffer wrong sometimes. see below Since the ringbuffer size is small it becomes full and some part of data is loss.This is not my current problem. ** Questions: On printing the data thats read from ringbuffer ,some data gets interchanged !!I'm unable to find the bug. Is the logic/approach correct ? (or) Is there a better way to do this ringbuffer.h #define RING_BUFFER_SIZE 500 struct ringbuffer { char *buffer; int wr_pointer; int rd_pointer; int size; int fill_count; }; ringbuffer.c #include <stdio.h> #include <stdlib.h> #include <string.h> #include "ringbuffer.h" int init_ringbuffer(char *rbuffer, struct ringbuffer *rb, size_t size) { rb->buffer = rbuffer; rb->size = size; rb->rd_pointer = 0; rb->wr_pointer = 0; rb->fill_count = 0; return 0; } int rb_get_free_space (struct ringbuffer *rb) { return (rb->size - rb->fill_count); } int rb_write (struct ringbuffer *rb, unsigned char * buf, int len) { int availableSpace; int i; availableSpace = rb_get_free_space(rb); printf("In Write AVAIL SPC=%d\n",availableSpace); /* Check if Ring Buffer is FULL */ if(len > availableSpace) { printf("NO SPACE TO WRITE - RETURN\n"); return -1; } i = rb->wr_pointer; if(i == rb->size) //At the end of Buffer { i = 0; } else if (i + len > rb->size) { memcpy(rb->buffer + i, buf, rb->size - i); buf += rb->size - i; len = len - (rb->size - i); rb->fill_count += len; i = 0; } memcpy(rb->buffer + i, buf, len); rb->wr_pointer = i + len; rb->fill_count += len; printf("w...rb->write=%tx\n", rb->wr_pointer ); printf("w...rb->read=%tx\n", rb->rd_pointer ); printf("w...rb->fill_count=%d\n", rb->fill_count ); return 0; } int rb_read (struct ringbuffer *rb, unsigned char * buf, int max) { int i; printf("In Read,Current DATA size in RB=%d\n",rb->fill_count); /* Check if Ring Buffer is EMPTY */ if(max > rb->fill_count) { printf("In Read, RB EMPTY - RETURN\n"); return -1; } i = rb->rd_pointer; if (i == rb->size) { i = 0; } else if(i + max > rb->size) { memcpy(buf, rb->buffer + i, rb->size - i); buf += rb->size - i; max = max - (rb->size - i); rb->fill_count -= max; i = 0; } memcpy(buf, rb->buffer + i, max); rb->rd_pointer = i + max; rb->fill_count -= max; printf("r...rb->write=%tx\n", rb->wr_pointer ); printf("r...rb->read=%tx\n", rb->rd_pointer ); printf("DATA READ ---> %s\n",(char *)buf); printf("r...rb->fill_count=%d\n", rb->fill_count ); return 0; }
At the producer you also need to wait on conditional variable for the has empty space condition. The both conditional variables should be signaled unconditionally, i.e. when a consumer removes an element from the ring buffer it should signal the producers; when a producer put something in the buffer it should signal the consumers. Also, I would move this waiting/signaling logic into rb_read and rb_write implementations, so your ring buffer is a 'complete to use solution' for the rest of your program.
As to your questions -- 1. I can't find that bug either -- in fact, I've tried your code and don't see that behavior. 2. You ask if this is logic/approach correct -- well, as far as it goes, this does implement a kind of ring buffer. Your test case happens to have an integer multiple of the size, and the record size is constant, so that's not the best test. In trying your code, I found that there is a lot of thread starvation -- the 1st producer thread to run (the last created) hits things really hard, trying and failing after the 1st 5 times to stuff things into the buffer, not giving the consumer thread a chance to run (or even start). Then, when the consumer thread starts, it stays cranking for quite some time before it releases the cpu, and the next producer thread finally starts. That's how it works on my machine -- it will be different on different machines, I'm sure. It's too bad that your current code doesn't have a way to end -- creating files of 10's or 100's of MB ... hard to wade through.
(Probably a bit later for the author, but if anyone else searches for a "multiple producers single consumer") I think the fundamental problem in that implementation is what rb_write modifies a global state (rb->fill_count and other rb->XX) w/o doing any synchronization between multiple writers. For alternative ideas check the: http://www.linuxjournal.com/content/lock-free-multi-producer-multi-consumer-queue-ring-buffer.
one consumer multiple producer in c prevent racing when resuming after full buffer
I made a circular buffer with multiple clients writing a message of different length into a buffer. The server reads them out. It based the code an the consumer/producer problem. The problem is when the buffer is full and the server removes all the data from the buffer the client is signaled to resume it's writing operations but instead another client (in another thread) start writing it message in the buffer. I want client that was already writing before the buffer was full to resume it's operations so that the message doesn't arrive out of order. This is my code (i removed a lot of test code) #include <stdio.h> #include <malloc.h> #include <string.h> #include <pthread.h> #include <unistd.h> #define BUFFER_SIZE 8 #define NUM_THREADS 4 struct cBuf{ char *buf; int size; int start; int end; pthread_mutex_t mutex; pthread_cond_t buffer_full; pthread_cond_t buffer_empty; }; struct cBuf cb; void buf_Init(struct cBuf *cb, int size) { int i; cb->size = size + 1; cb->start = 0; cb->end = 0; cb->buf = (char *)calloc(cb->size, sizeof(char)); for (i=0;i<size;i++) cb->buf[i]='_'; } void buf_Free(struct cBuf *cb) { free(cb->buf); } int buf_IsFull(struct cBuf *cb) { return (cb->end + 1) % cb->size == cb->start; } int buf_IsEmpty(struct cBuf *cb) { return cb->end == cb->start; } int buf_Insert(struct cBuf *cb, char *elem) { int i,j; pthread_mutex_lock(&(cb->mutex)); for (i=0; i < strlen(elem); ++ i){ if (buf_IsFull(cb)==1) printf("\nProducer (buf_Insert) is waiting because of full buffer"); while(buf_IsFull(cb)){ pthread_cond_signal(&(cb->buffer_full)); pthread_cond_wait(&(cb->buffer_empty),&(cb->mutex)); } cb->buf[cb->end] = elem[i]; cb->end = (cb->end + 1) % cb->size; printf("%c [INPUT]",elem[i]); } pthread_cond_signal(&(cb->buffer_full)); pthread_mutex_unlock(&(cb->mutex)); return 0; } int buf_Read(struct cBuf *cb, char *out) { int i,j; pthread_mutex_lock(&(cb->mutex)); if (buf_IsEmpty(cb))printf("\nConsumer (buf_Read) is waiting because of empty buffer\n"); while(buf_IsEmpty(cb)){ pthread_cond_wait(&(cb->buffer_full),&(cb->mutex)); } for (i=0;i<BUFFER_SIZE-1;i++){ printf("\n"); if (cb->start == cb->end) break; out[i] = cb->buf[cb->start]; cb->buf[cb->start] = '_'; cb->start = (cb->start + 1) % cb->size; printf("%c [OUTPUT]",out[i]); } pthread_cond_signal(&(cb->buffer_empty)); pthread_mutex_unlock(&(cb->mutex)); return 0; } void * client(void *cb){ pthread_detach(pthread_self()); struct cBuf *myData; myData = (struct cBuf*) cb; char input[]="Hello World!"; if (buf_Insert(myData, input)){ //succes on return 0 printf("\n"); } return 0; } int main(void) { char out[60]; pthread_t thread; int i; /* Initialise conditioners*/ pthread_cond_init(&(cb.buffer_full),NULL); pthread_cond_init(&(cb.buffer_empty),NULL); buf_Init(&cb, BUFFER_SIZE); for (i = 0; i<NUM_THREADS; i++){ if(pthread_create (&thread,NULL, client, (void *) &cb) !=0){ } else { } } while (1){ if (buf_Read(&cb,out)){ } } //empty the buffer; free the allocated memory buf_Free(&cb); return 0; }
I already explained in comment in Producer/consumer seems to be in deadlock when buffer is smaller than input from producer, but those are comments, so here goes as answer: You should never ever have partial message in the queue. Make sure you never write one. You can check whether there is enough space before starting to write the message and wait for buffer_empty straight away if there's not, or you can change the queue to send shared pointers to allocated data (either pass ownership to consumer or reference-counted) or something, so each message only takes up one slot in the queue and allocated memory for the rest. What's best will depend on the exact nature of your message. Anything will do as long as there are no partial messages. While it would be possible to record which particular writer needs to finish a message and wake just that, it would be awfully complicated. Synchronization is hard as it is, don't make it any harder by placing additional requirements on it. In fact unless this is a homework (in a sense you do it to learn how synchronization works), just look for ready-made message queues. The SysV-IPC ones or unix-domain sockets in datagram mode are two options that come to mind, or look for some library that does.