gdb debugging - only \n inside stdin - c

I'm debugging my program in windows with GDB and need stdin.
So, I compiled it :
gcc -g abstieg2.c
gdb a
break 1
run < graph1.in
But the stdin has only \n in it!
do{
getline(&line,&size,stdin);
} while(!strcmp("\n",line)); // for testing, gets stuck forever, but only with gdb
I really can't see what more can be relevant to this problem.
Thanks
stripped down version of my code:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <limits.h>
void *errorchecked_malloc(unsigned int);
unsigned int correctinput(char**);
void free_us();
void my_qsort(int , int r);
unsigned int *Dijkstra(int , int , unsigned int , int );
unsigned int min_distance(unsigned int* , int );
int linearsearch(int);
char* strerror(int);
unsigned int *start_edge=NULL; // Input of edges is organised into three array
unsigned int *end_edge=NULL;
unsigned int *length_edge=NULL;
unsigned int *base_camp=NULL ; // Input of basecamps
unsigned int *Dijkstra_dist=NULL; // distance value of dijkstra algorithm
unsigned int *Dijkstra_vis=NULL; // visited by dijkstra algorithm?
unsigned int *Dijkstra_predec=NULL ; // predecessor of dijkstra algorithm
unsigned int* found=NULL;
int * pos_in_vertices;
char* store_for_reset= NULL; // line (input) char* wil be changed, in store_for_reset line will be saved
int basecamp_length=0;
/*Aim: Find the shortest way in a graph from start_vertices to finish_vertices
* restrictions:
* -2 Day travel with max_distance per day
* -after one day a basecamp must be reached( or the finish vertices)
* -N vertices <=N
*
* Input is organized as following:
* start_vertices finish_vertices d_max\n //
* start_vertices end_vertices max_distance\n // for each edge
* ...
* base_camp\n
* base_camp\n
* ...
*/
int main( ) {
int N=0; // count of vertices
int arg_line_count=0; //
unsigned int start; // First three Input of start basecamp
unsigned int end; // finish basecamp
unsigned int d_max; // maximum travel distance per day
char* line = (char *)errorchecked_malloc(36*sizeof(char));
store_for_reset= line;
int size =strlen(line);
//Input configuration
do{
getline(&line,&size,stdin);
printf("%s",line);
} while(!strcmp("\n",line));// for testing, gets stuck forever, but only with gdb
start_edge = (int*)errorchecked_malloc(sizeof(int)*1000); // creating space, TODO dynamic space allocation
end_edge = (int*)errorchecked_malloc(sizeof(int)*1000);
length_edge = (int*)errorchecked_malloc(sizeof(int)*1000);
start = correctinput(&line); // first line input
end = correctinput(&line);
d_max = correctinput(&line);
// input of all edges
for(int i=0;fgets(line,size,stdin);i++){ // fgets returns NULL if stdin empty
printf("Zeile %d \n", i);
start_edge[i]=correctinput(&line);
printf("line: %s", line);
if(line[0]=='\0'){ // end of line, means now are only basecamps left
base_camp[0]=start_edge[i];
start_edge[i]=0;
basecamp_length=1;
break;
}
if(start_edge[i]>N) N= start_edge[i];
end_edge[i]=correctinput(&line);
length_edge[i]=correctinput(&line);
line =store_for_reset;
}
// Input of basecamps
base_camp= (int*)errorchecked_malloc(sizeof(int)*N); // generous, N is maximum of Nodes
for(int i=1;fgets(line,size,stdin);i++){
base_camp[i]=correctinput(&line);
if(line!=NULL){
printf("fatal error:Too many arguments per line while reading \"Basislagern\" input");
free_us();
exit(-1);
}
basecamp_length++;
}
free_us();
}
unsigned int correctinput( char** string){
char* test;
unsigned int tmp =strtol(*string,&test,10);
if((test[0]!=' ' && test[0]!= '\n' ) || errno != 0 ) {
printf("Don't mock me! Please use the correct input format. \n Information: ");
strerror(errno);
printf(" Next Character: \'%d\'", atoi(test));
free_us();
exit(-1);
}
//printf("test: %s, /n",test);
int i;
for(i=0;(*string)[i]>='0' && (*string)[i]<='9';i++){
*string=(*string)+ i*sizeof(char); // moves the input pointer to the next argument( therefore pointer to pointer)
}
if(*string[i]==' ')string++;
return tmp;
}
void free_us(){
free(start_edge);
free(end_edge);
free(length_edge);
free(base_camp);
free(Dijkstra_dist);
free(Dijkstra_vis);
free(pos_in_vertices);
free(Dijkstra_predec);
free(store_for_reset);
free(found);
}
void *errorchecked_malloc(unsigned int size){
void *ptr;
ptr = malloc(size);
if(ptr == NULL) {
fprintf(stderr, "Error: could not allocate heap memory. \n");
free_us();
exit(-1);
}
return ptr;
}
Input looks like this:
0 1 3924456639
0 5 1268156980
0 18 293858388
0 74 142402607
1 4 145988610
....
24
1
27
79
4
70
...

You are probably hitting this Windows specific gdb bug:
https://www.cygwin.com/ml/cygwin/1999-04/msg00308.html
Try to upgrade to a latest version of gdb (8.0 as of now).
Among other things there were some enhancements for debugging on MS-Windows in this release. See in NEWS file:
55 * Native debugging on MS-Windows supports command-line redirection
56
57 Command-line arguments used for starting programs on MS-Windows can
58 now include redirection symbols supported by native Windows shells,
59 such as '<', '>', '>>', '2>&1', etc. This affects GDB commands such
60 as "run", "start", and "set args", as well as the corresponding MI
61 features.

Related

Sum count by grouping according to another column in C

I have a text file of the following form. Left column is names of players and right column is their score in games they played.
john 40
mary 50
john 30
kevin 88
kevin 29
joe 102
david 11
mary 134
I want to sum up the scores of the players. So, I want to print output of the form
john 70
mary 184
kevin 117
joe 102
david 11
I know that this can be easily done in R or Python. But I want to do this using C. So, I try to declare an array of structures in C and try to read each line from the file. struct is defined as a global variable, so by default, the struct members are initialized to zero values or null character in case of char array. Then, I try to read each row into the struct, which itself is an element of the array. But, while implementing this, I got stuck where new rows are to be read and then stored into structs. Is there any efficient way to do this ? Since R or 'pandas are based on C, their underlying code is probably written in C. How is it done there ?
Thanks
Typically you'd read a line, split it up on whitespace, see if an entry with that name already exists in a hash table or tree or other map data structure, and if so, add the current value to it, and if not, insert it using the current value. Then at the end traverse the map printing out the entries. Basically, the same approach you'd take with any language.
However, those other languages often have things like map data structures, high level abstractions for reading files and parsing text, etc., so a task like this can be done in a few lines (Shoot, awk can do it in one). With C, you have to write most of that stuff yourself, or use add-on libraries - the C standard, for example, has no hash table or trees. You basically have to do everything manually that languages like Python are doing for you under the hood.
Here's an example that uses the POSIX binary search tree functions (An awkward but portable API):
#define _GNU_SOURCE
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <search.h>
struct record {
int num;
char name[];
};
struct record *make_record(const char *name, int num) {
size_t len = strlen(name);
struct record *r = malloc(sizeof *r + len + 1);
r->num = num;
memcpy(r->name, name, len);
r->name[len] = 0;
return r;
}
int reccmp(const void *va, const void *vb) {
const struct record *a = va, *b = vb;
return strcmp(a->name, b->name);
}
void print_rec(const void *nodep, VISIT which, int depth) {
(void)depth;
// Print records in sorted order.
if (which == postorder || which == leaf) {
const struct record *r = *(const struct record **)nodep;
printf("%s\t%d\n", r->name, r->num);
}
}
int main(int argc, char **argv) {
if (argc != 2) {
fprintf(stderr, "Usage: %s filename\n", argc > 0 ? argv[0] : "program");
return EXIT_FAILURE;
}
FILE *fp = fopen(argv[1], "r");
if (!fp) {
fprintf(stderr, "%s: Unable to open %s: %s\n", argv[0], argv[1],
strerror(errno));
return EXIT_FAILURE;
}
void *counts = NULL; // Opaque pointer to the root of the tree
int lineno = 0;
char *line = NULL;
size_t line_len = 0;
while (getline(&line, &line_len, fp) > 0) {
lineno += 1;
char *saveptr = NULL;
char *name = strtok_r(line, " ", &saveptr);
char *numstr = strtok_r(NULL, " ", &saveptr);
if (!name || !*name || !numstr || !*numstr) {
fprintf(stderr, "Line %d of input is malformed!\n", lineno);
continue;
}
int num = atoi(numstr);
struct record *new_rec = make_record(name, num);
// tsearch() either inserts a new node and returns a pointer to it,
// or returns a pointer to an existing matching node.
struct record *found_rec =
*(struct record **)tsearch(new_rec, &counts, reccmp);
if (new_rec != found_rec) {
// If it's the latter, update its number sum and free the struct used
// to look it up.
found_rec->num += num;
free(new_rec);
}
}
free(line);
fclose(fp);
twalk(counts, print_rec);
#ifdef __GLIBC__
// Prevent spurious warnings from tools like ASan and valgrind about
// memory leaks.
tdestroy(counts, free);
#endif
return 0;
}
Example usage:
$ gcc -g -O -Wall -Wextra group.c
$ ./a.out input.txt
david 11
joe 102
john 70
kevin 117
mary 184

How can I resolve the collision in the hashing in this code I did? Currently cannot search for NG CHEA YEAT's ID only

I have the following text file
1171203258:HOSSAIN, MARUF
1181202660:KUHAN RAJ A/L TAMIL CHEL WAM
1181203465:PONG KAI SUN
1191102443:FAIZA OSAMA ABDALLA HASHIM
1201302289:LEE JIA WEI
1201302368:SHEIKH, AHNAF AZMAIN
1201100584:HI CHIA LING
1201101509:NG CHEA YEAT
1191103201:PHUAH CHEE HAOU
1201100879:MOSTAFA ARABY MADBOULY AHMED
1191103215:TONG JUN YANG
1191103119:ANG QIZHENG
1171302286:DARWIN KUMAR A/L MUNIAN
1181101192:HAIZUN NAJWA BINTI MOHD RIFIN
1201100926:NG XUE NIE
1191302417:ALMARHOON, ALI HUSSAIN A
1201100225:HEMAN RAO A/L SUBRAMANIAM
1181100823:LIM ZHEN BANG
1161202587:SOHEIL PRAKASAN SUPPAN
1201100603:AVINASH MURALI
1181101858:CHEAH KOK YEW
1191103071:GAN WEI TONG
1201100301:KEVIN THAM ZHENG YIT
1201100648:LIM CHER AIK
1201302222:SHIVAA RUTRAN A/L NAGATHEESAN
1201100779:TAN WEI XIANG
1191100919:WONG HONG WEI
The code I have for now, work well but have collision in the hashing I think
Here is what I have so far:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MDIR 27 //size of list
#define MBUFF 256
#define MHASH 109 //hash function is %109
#define MNAME 40
struct List{
char name[40];
int studID;
};
//function prototype
int comparator(const void* p, const void* q){
return strcmp(((struct List*)p)->name,((struct List*)q)->name);
}
int readData(struct List dir[]);
int hashfunc(char *name);
void hash(struct List dir[], int ndir,
int hashtable[]);
int search(char *key,
struct List s[], int hashtable[]);
//main function
int main(){
int ndir, result, hashtable[MHASH];
int count;
int i;
int j;
struct List s[27];
char temp[27];
char query[40];
FILE *fptr;
fptr = fopen("rec.txt", "r+");
if (fptr != NULL) {
printf("File created successfully!\n");
}
else {
printf("Failed to create the file.\n");
// exit status for OS that an error occurred
return -1;
}
for(count = 0; count < 27; count++){
fscanf(fptr,"%d", &s[count].studID);
fgets(s[count].name,40,fptr);
}
qsort
qsort(s,27,sizeof(struct List),comparator);
printing the sorted name then continue the hashing of searching
//printing sorted name
printf("Sorted Names\n");
for(i=0;i<27;i++){
printf("%d%s\n", i+1, s[i].name);
}
fclose(fptr);
hashing of searching part
ndir=readData(s);
hash(s,ndir,hashtable);
puts("\nName to search>>");
fgets(query,MNAME-1,stdin);
query[strlen(query)-1]='\0';
result=search(query,s,hashtable);
if(result==-1)
printf("Not Found");
else
printf("%s's ID is %d\n",
s[result].name, s[result].studID);
return 0;
}
read function
int readData(struct List dir[]){
FILE *fdir=fopen("rec.txt","r");
char buff[MBUFF];
int i=0;
while(i<MDIR && fgets(buff,MBUFF-1,fdir)){
dir[i].studID=atol(strtok(buff,":"));
strcpy(dir[i].name,strtok(NULL, "\n"));
i++;
}
return(i);
}
hash function
int hashfunc(char *name){
long sum=0;
int k=0;
while(name[k]){
sum+=name[k];
k++;
}
return( (int) (sum % MHASH) );
}
hash function
void hash(struct List dir[], int ndir,
int hashtable[]){
int k;
int index;
for(k=0;k<ndir;k++){
index = hashfunc(dir[k].name);
hashtable[index]=k;
}
}
search function
int search(char *key, struct List dir[],
int hashtable[]){
int index=hashfunc(key);
int k=hashtable[index];
if(strcmp(key,dir[k].name)==0)
return(k);
else
return(-1);
}
I am not sure for the hashing of searching part
Whenever faced with a need to separate fields in a line of data, the normal approach is to read an entire line of data as a string into a buffer (character array). Then you separate what you need from the buffer using whatever method fits the data the best. Either using a pair of pointers to bracket the text you need and then copying the characters between the pointers. You can automate the process using string functions like strchr() to locate the ':' in the buffer. You can also use string functions like strtok() to split the buffer into tokens on any given set of delimiters.
However here there is an even simpler method. Since you have a fixed format for the studID and name in the line, you can simply use sscanf(), e.g.
#include <stdio.h>
#include <stdlib.h>
#define MXSTUD 30 /* if you need a constant, #define one (or more) */
#define MXNAME 40
typedef struct list { /* adding typedef for convenience */
char name[MXNAME];
unsigned studID;
} list;
...
int main (int argc, char **argv) {
int count = 0; /* count of students */
char buf[MXNAME * 2]; /* temprorary storage for line */
list s[MXSTUD] = {{ .name = "" }}; /* list array initialized all 0 */
/* open filename given as 1st argument or "rec.text" if none given */
FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
if (!fptr) { /* validate file open for reading */
fputs ("error: file open failed\n", stderr);
return 1;
}
while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
/* separate studID and name using sscanf() */
if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
count += 1; /* increment count on success */
}
}
...
That's all that is needed to read each line of data and separate the line into studID and name storing each in an element of the list array of struct.
Use qsort() For Sorting
Regardless of whether you have an array or allocated block of memory containing objects, qsort() provides a simple and efficient way to sort it. All you need to do is write a compare() function telling qsort() how to compare the elements. The declaration for the qsort() compare function is:
int compare (const void *a, const void *b);`
Where a and b are simple pointers-to elements of your array to be compared. So when writing the function, all you need to do is cast a and b to the proper type and write the logic to compare whatever you like in the two elements. A negative return means a sorts before b and a positive return means b sorts before a. A zero return means the elements are equal.
Casting the a and b to type const list * (you include const since the data isn't modified which allows the compiler freedom to optimize more fully), you simply loop over each name comparing characters and returning when two characters differ or the end of file is reached. Here, to sort your s[] array by name you can do:
/* qsort compare function lexagraphically sorts words */
int compare (const void *a, const void *b)
{
/* a & b are pointers to adjacent list elements, (pointers to list) */
const list *sa = (const list *)a,
*sb = (const list *)b;
const char *na = sa->name, /* pointers to name in each element */
*nb = sb->name;
/* loop advancing a character in each word per-iteration */
for (;; na++, nb++) {
/* if characters differ or at end of either */
if (*na != *nb || !*na)
break;
}
return (*na > *nb) - (*na < *nb); /* return sort order */
}
Then to sort your array of list (your s[] array) with qsort(), all that is needed is:
qsort (s, count, sizeof *s, compare); /* sort array by name */
Putting it all together in a short program that reads from the filename given as the first argument to the program (or from "rec.text" by default if no argument is given), you can do:
#include <stdio.h>
#include <stdlib.h>
#define MXSTUD 30 /* if you need a constant, #define one (or more) */
#define MXNAME 40
typedef struct list { /* adding typedef for convenience */
char name[MXNAME];
unsigned studID;
} list;
/* qsort compare function lexagraphically sorts words */
int compare (const void *a, const void *b)
{
/* a & b are pointers to adjacent list elements, (pointers to list) */
const list *sa = (const list *)a,
*sb = (const list *)b;
const char *na = sa->name, /* pointers to name in each element */
*nb = sb->name;
/* loop advancing a character in each word per-iteration */
for (;; na++, nb++) {
/* if characters differ or at end of either */
if (*na != *nb || !*na)
break;
}
return (*na > *nb) - (*na < *nb); /* return sort order */
}
int main (int argc, char **argv) {
int count = 0; /* count of students */
char buf[MXNAME * 2]; /* temprorary storage for line */
list s[MXSTUD] = {{ .name = "" }}; /* list array initialized all 0 */
/* open filename given as 1st argument or "rec.text" if none given */
FILE *fptr = fopen (argc > 1 ? argv[1] : "rec.text", "r");
if (!fptr) { /* validate file open for reading */
fputs ("error: file open failed\n", stderr);
return 1;
}
while (fgets (buf, sizeof buf, fptr)) { /* read each line into buf */
/* separate studID and name using sscanf() */
if (sscanf (buf, "%u:%39[^\n]", &s[count].studID, s[count].name) == 2) {
count += 1; /* increment count on success */
}
}
qsort (s, count, sizeof *s, compare); /* sort array by name */
for (int i = 0; i < count; i++) { /* output results */
printf ("%2d %10u %s\n", i + 1, s[i].studID, s[i].name);
}
}
(note: you simply need to open the file in read mode "r")
Example Use/Output
With your data in a file named dat/studIDlist.txt, for the 27 students in your data you would get:
$ ./bin/studIDlist dat/studIDlist.txt
1 1191302417 ALMARHOON, ALI HUSSAIN A
2 1191103119 ANG QIZHENG
3 1201100603 AVINASH MURALI
4 1181101858 CHEAH KOK YEW
5 1171302286 DARWIN KUMAR A/L MUNIAN
6 1191102443 FAIZA OSAMA ABDALLA HASHIM
7 1191103071 GAN WEI TONG
8 1181101192 HAIZUN NAJWA BINTI MOHD RIFIN
9 1201100225 HEMAN RAO A/L SUBRAMANIAM
10 1201100584 HI CHIA LING
11 1171203258 HOSSAIN, MARUF
12 1201100301 KEVIN THAM ZHENG YIT
13 1181202660 KUHAN RAJ A/L TAMIL CHEL WAM
14 1201302289 LEE JIA WEI
15 1201100648 LIM CHER AIK
16 1181100823 LIM ZHEN BANG
17 1201100879 MOSTAFA ARABY MADBOULY AHMED
18 1201101509 NG CHEA YEAT
19 1201100926 NG XUE NIE
20 1191103201 PHUAH CHEE HAOU
21 1181203465 PONG KAI SUN
22 1201302368 SHEIKH, AHNAF AZMAIN
23 1201302222 SHIVAA RUTRAN A/L NAGATHEESAN
24 1161202587 SOHEIL PRAKASAN SUPPAN
25 1201100779 TAN WEI XIANG
26 1191103215 TONG JUN YANG
27 1191100919 WONG HONG WEI
You will have to get line by line your file and store it in an array.
FILE *fp = fopen("lorem.txt", "r");
if(fp == NULL) {
perror("Unable to open file!");
exit(1);
}
char chunk[128];
while(fgets(chunk, sizeof(chunk), fp) != NULL) {
fputs(chunk, stdout);
fputs("|*\n", stdout); // marker string used to show where the content the chunk array has ended
}
fclose(fp);
To split each line use strtok() function:
char *token = strtok(line, ":"); // To separate the first block from the second like seen on your image.
char *token[1] = strtok(token, ","); // To separate the other part

DES CBC mode not outputting correctly

I am working on a project in C to implement CBC mode on top of a skeleton code for DES with OpenSSL. We are not allowed to use a function that does the CBC mode automatically, in the sense that we must implement it ourselves. I am getting output but I have result files and my output is not matching up completely with the intended results. I also am stuck on figuring out how to pad the file to ensure all the blocks are of equal size, which is probably one of the reasons why I'm not receiving the correct output. Any help would be appreciated. Here's my modification of the skeleton code so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/des.h>
#include <sys/time.h>
#include <unistd.h>
#define ENC 1
#define DEC 0
DES_key_schedule key;
int append(char*s, size_t size, char c) {
if(strlen(s) + 1 >= size) {
return 1;
}
int len = strlen(s);
s[len] = c;
s[len+1] = '\0';
return 0;
}
int getSize (char * s) {
char * t;
for (t = s; *t != '\0'; t++)
;
return t - s;
}
void strToHex(const_DES_cblock input, unsigned char *output) {
int arSize = 8;
unsigned int byte;
for(int i=0; i<arSize; i++) {
if(sscanf(input, "%2x", &byte) != 1) {
break;
}
output[i] = byte;
input += 2;
}
}
void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) {
DES_LONG temp[2];
memcpy(temp, roundOutput, 8*sizeof(unsigned char));
for(int i=0; i<2; i++) {
xorValue[i] = temp[i] ^ data[i];
}
}
void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) {
DES_LONG in[2];
doBitwiseXor(in, data, roundOutput);
DES_encrypt1(in,&key,ENC);
printf("ENCRYPTED\n");
printvalueOfDES_LONG(in);
printf("%s","\n");
fwrite(in, 8, 1, outFile);
memcpy(roundOutput, in, 2*sizeof(DES_LONG));
}
int main(int argc, char** argv)
{
const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
// Initialize the timing function
struct timeval start, end;
gettimeofday(&start, NULL);
int l;
if ((l = DES_set_key_checked(&cbc_key,&key)) != 0)
printf("\nkey error\n");
FILE *inpFile;
FILE *outFile;
inpFile = fopen("test.txt", "r");
outFile = fopen("test_results.txt", "wb");
if(inpFile && outFile) {
unsigned char ch;
// A char array that will hold all 8 ch values.
// each ch value is appended to this.
unsigned char eight_bits[8];
// counter for the loop that ensures that only 8 chars are done at a time.
int count = 0;
while(!feof(inpFile)) {
// read in a character
ch = fgetc(inpFile);
// print the character
printf("%c",ch);
// append the character to eight_bits
append(eight_bits,1,ch);
// increment the count so that we only go to 8.
count++;
const_DES_cblock roundOutput;
// When count gets to 8
if(count == 8) {
// for formatting
printf("%s","\n");
// Encrypt the eight characters and store them back in the char array.
//DES_encrypt1(eight_bits,&key,ENC);
doCBCenc(eight_bits, roundOutput, outFile);
// prints out the encrypted string
int k;
for(k = 0; k < getSize(eight_bits); k++){
printf("%c", eight_bits[k]);
}
// Sets count back to 0 so that we can do another 8 characters.
count = 0;
// so we just do the first 8. When everything works REMOVE THE BREAK.
//break;
}
}
} else {
printf("Error in opening file\n");
}
fclose(inpFile);
fclose(outFile);
// End the timing
gettimeofday(&end, NULL);
// Initialize seconds and micros to hold values for the time output
long seconds = (end.tv_sec - start.tv_sec);
long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
// Output the time
printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros);
}
Your crypto is at least half correct, but you have a lot of actual or potential other errors.
As you identified, raw CBC mode can only encrypt data which is a multiple of the block size, for DES 64 bits or 8 bytes (on most modern computers and all where you could use OpenSSL). In some applications this is okay; for example if the data is (always) an MD5 or SHA-256 or SHA-512 hash, or a GUID, or an IPv6 (binary) address, then it is a block multiple. But most applications want to handle at least any length in bytes, so they need to use some scheme to pad on encrypt and unpad on decrypt the last block (all blocks before the last already have the correct size). Many different schemes have been developed for this, so you need to know which to use. I assume this is a school assignment (since no real customer would set such a stupid and wasteful combination of requirements) and this should either have been specified or clearly left as a choice. One padding scheme very common today (although not for single-DES, because that is broken, unsafe, obsolete, and not common) is the one defined by PKCS5 and generalized by PKCS7 and variously called PKCS5, PKCS7, or PKCS5/7 padding, so I used that as an example.
Other than that:
you try to test feof(inpFile) before doing fgetc(inpFile). This doesn't work in C. It results in your code treating the low 8 bits of EOF (255 aka 0xFF on practically all implementations) as a valid data character added to the characters that were actually in the file. The common idiom is to store the return of getchar/getc/fgetc in a signed int and compare to EOF, but that would have required more changes so I used an alternate.
you don't initialize eight_bits which is a local-scope automatic duration variable, so its contents are undefined and depending on the implementation are often garbage, which means trying to 'append' to it by using strlen() to look for the end won't work right and might even crash. Although on some implementations at least some times it might happen to contain zero bytes, and 'work'. In addition it is possible in C for a byte read from a file (and stored here) to be \0 which will also make this work wrong, although if this file contains text, as its name suggests, it probably doesn't contain any \0 bytes.
once you fill eight_bits you write 'off-the-end' into element [8] which doesn't exist. Technically this is Undefined Behavior and anything at all can happen, traditionally expressed on Usenet as nasal demons. Plus after main finishes the first block it doesn't change anything in eight_bits so all further calls to append find it full and discard the new character.
while you could fix the above points separately, a much simple solution is available: you are already using count to count the number of bytes in the current block, so just use it as the subscript.
roundOutput is also an uninitialized local/auto variable within the loop, which is then used as the previous block for the CBC step, possibly with garbage or wrong value(s). And you don't use the IV at all, as is needed. You should allocate this before the loop (so it retains its value through all iterations) and initialize it to the IV, and then for each block in the loop your doCBCenc can properly XOR it to the new block and then leave the encrypted new block to be used next time.
your code labelled 'prints out the encrypted string' prints plaintext not ciphertext -- which is binary and shouldn't be printed directly anyway -- and is not needed because your file-read loop already echoes each character read. But if you do want to print a (validly null-terminated) string it's easier to just use fputs(s) or [f]printf([f,]"%s",s) or even fwrite(s,1,strlen(s),f).
your doCBCenc has a reference to printvalueofDES_LONG which isn't defined anywhere, and which along with two surrounding printf is clearly not needed.
you should use a cast to convert the first argument to doCBCenc -- this isn't strictly required but is good style and a good compiler (like mine) complains if you don't
finally, when an error occurs you usually print a message but then continue running, which will never work right and may produce symptoms that disguise the problem and make it hard to fix.
The below code fixes the above except that last (which would have been more work for less benefit) plus I removed routines that are now superfluous, and the timing code which is just silly: Unix already has builtin tools to measure and display process time more easily and reliably than writing code. Code I 'removed' is under #if 0 for reference, and code I added under #else or #if 1 except for the cast. The logic for PKCS5/7 padding is under #if MAYBE so it can be either selected or not. Some consider it better style to use sizeof(DES_block) or define a macro instead of the magic 8's, but I didn't bother -- especially since it would have required changes that aren't really necessary.
// SO70209636
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/des.h>
#include <sys/time.h>
#include <unistd.h>
#define ENC 1
#define DEC 0
DES_key_schedule key;
#if 0
int append(char*s, size_t size, char c) {
if(strlen(s) + 1 >= size) {
return 1;
}
int len = strlen(s);
s[len] = c;
s[len+1] = '\0';
return 0;
}
int getSize (char * s) {
char * t;
for (t = s; *t != '\0'; t++)
;
return t - s;
}
void strToHex(const_DES_cblock input, unsigned char *output) {
int arSize = 8;
unsigned int byte;
for(int i=0; i<arSize; i++) {
if(sscanf(input, "%2x", &byte) != 1) {
break;
}
output[i] = byte;
input += 2;
}
}
#endif
void doBitwiseXor(DES_LONG *xorValue, DES_LONG* data, const_DES_cblock roundOutput) {
DES_LONG temp[2];
memcpy(temp, roundOutput, 8*sizeof(unsigned char));
for(int i=0; i<2; i++) {
xorValue[i] = temp[i] ^ data[i];
}
}
void doCBCenc(DES_LONG *data, const_DES_cblock roundOutput, FILE *outFile) {
DES_LONG in[2];
doBitwiseXor(in, data, roundOutput);
DES_encrypt1(in,&key,ENC);
#if 0
printf("ENCRYPTED\n");
printvalueOfDES_LONG(in);
printf("%s","\n");
#endif
fwrite(in, 8, 1, outFile);
memcpy(roundOutput, in, 2*sizeof(DES_LONG));
}
int main(int argc, char** argv)
{
const_DES_cblock cbc_key = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
const_DES_cblock IV = {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
#if 0
// Initialize the timing function
struct timeval start, end;
gettimeofday(&start, NULL);
#endif
int l;
if ((l = DES_set_key_checked(&cbc_key,&key)) != 0)
printf("\nkey error\n");
#if 1
DES_cblock roundOutput; // must be outside the loop
memcpy (roundOutput, IV, 8); // and initialized
#endif
FILE *inpFile;
FILE *outFile;
inpFile = fopen("test.txt", "r");
outFile = fopen("test.encrypt", "wb");
if(inpFile && outFile) {
unsigned char ch;
// A char array that will hold all 8 ch values.
// each ch value is appended to this.
unsigned char eight_bits[8];
// counter for the loop that ensures that only 8 chars are done at a time.
int count = 0;
#if 0
while(!feof(inpFile)) {
// read in a character
ch = fgetc(inpFile);
#else
while( ch = fgetc(inpFile), !feof(inpFile) ){
#endif
// print the character
printf("%c",ch);
#if 0
// append the character to eight_bits
append(eight_bits,1,ch);
// increment the count so that we only go to 8.
count++;
#else
eight_bits[count++] = ch;
#endif
#if 0
const_DES_cblock roundOutput;
#endif
// When count gets to 8
if(count == 8) {
// for formatting
printf("%s","\n");
// Encrypt the eight characters and store them back in the char array.
//DES_encrypt1(eight_bits,&key,ENC);
doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile);
#if 0
// prints out the encrypted string
int k;
for(k = 0; k < getSize(eight_bits); k++){
printf("%c", eight_bits[k]);
}
#endif
// Sets count back to 0 so that we can do another 8 characters.
count = 0;
// so we just do the first 8. When everything works REMOVE THE BREAK.
//break;
}
}
#if MAYBE
memset (eight_bits+count, 8-count, 8-count); // PKCS5/7 padding
doCBCenc((DES_LONG*)eight_bits, roundOutput, outFile);
#endif
} else {
printf("Error in opening file\n");
}
fclose(inpFile);
fclose(outFile);
#if 0
// End the timing
gettimeofday(&end, NULL);
// Initialize seconds and micros to hold values for the time output
long seconds = (end.tv_sec - start.tv_sec);
long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
// Output the time
printf("The elapsed time is %d seconds and %d microseconds\n", seconds, micros);
#endif
}
PS: personally I wouldn't put the fwrite in doCBCenc; I would only do the encryption and let the caller do whatever I/O is appropriate which might in some cases not be fwrite. But what you have is not wrong for the requirements you apparently have.

Quick-sorting with multiple threads leaves last 1/6 unsorted

My goal is to create a program that takes a large list of unsorted integers (1-10 million) and divides it into 6 parts where a thread concurrently sorts it. After sorting I merge it into one sorted array so I can find the median and mode quicker.
The input file will be something like this:
# 1000000
314
267
213
934
where the number following the # identifies the number of integers in the list.
Currently I can sort perfect and quickly without threading however when I began threading I ran into an issue. For a 1,000,000 data set it only sorts the first 833,333 integers leaving the last 166,666 (1/6) unsorted.
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#define BUF_SIZE 1024
int sum; /* this data will be shared by the thread(s) */
int * bigArr;
int size;
int findMedian(int array[], int size)
{
if (size % 2 != 0)
return array[size / 2];
return (array[(size - 1) / 2] + array[size / 2]) / 2;
}
/*compare function for quicksort*/
int _comp(const void* a, const void* b) {
return ( *(int*)a - *(int*)b);
}
/*This function is the problem method*/
/*indicate range of array to be processed with the index(params)*/
void *threadFct(int param)
{
int x= size/6;
if(param==0)x= size/6;
if(param>0&&param<5)x= (size/6)*param;
if(param==5)x= (size/6)*param+ (size%size/6);/*pass remainder into last thread*/
qsort((void*)bigArr, x, sizeof(bigArr[param]), _comp);
pthread_exit(0);
}
int main(int argc, char *argv[])
{
FILE *source;
int i =0;
char buffer[BUF_SIZE];
if(argc!=2){
printf("Error. please enter ./a followed by the file name");
return -1;}
source= fopen(argv[1], "r");
if (source == NULL) { /*reading error msg*/
printf("Error. File not found.");
return 1;
}
int count= 0;
while (!feof (source)) {
if (fgets(buffer, sizeof (buffer), source)) {
if(count==0){ /*Convert string to int using atoi*/
char str[1];
sprintf(str, "%c%c%c%c%c%c%c%c%c",buffer[2],buffer[3],buffer[4],buffer[5],buffer[6],buffer[7],buffer[8],buffer[9],buffer[10]);/*get string of first */
size= atoi(str); /* read the size of file--> FIRST LINE of file*/
printf("SIZE: %d\n",size);
bigArr= malloc(size*sizeof(int));
}
else{
//printf("[%d]= %s\n",count-1, buffer); /*reads in the rest of the file*/
bigArr[count-1]= atoi(buffer);
}
count++;
}
}
/*thread the unsorted array*/
pthread_t tid[6]; /* the thread identifier */
pthread_attr_t attr; /* set of thread attributes */
// qsort((void*)bigArr, size, sizeof(bigArr[0]), _comp); <---- sorts array without threading
for(i=0; i<6;i++){
pthread_create(&tid[i], NULL, &threadFct, i);
pthread_join(tid[i], NULL);
}
printf("Sorted array:\n");
for(i=0; i<size;i++){
printf("%i \n",bigArr[i]);
}
fclose(source);
}
So to clarify the problem function is in my threadFct().
To explain what the function is doing, the param(thread number) identifies which chunk of the array to quicksort. I divide the size into 6 parts and because the it is even, the remainder of the numbers go into the last chunk. So for example, 1,000,000 integers I would have the first 5/6 sort 166,666 each and the last 1/6 would sort the remainder (166670).
I am aware that
Multi-threading will not speed up much at all even for 10 million integers
This is not the most efficient way to find the median/mode
Thanks for reading this and any help is received with gratitude.
You're sorting the beginning of the array in every call to qsort. You're only changing the number of elements that each thread sorts, by setting x. You're also setting x to the same value in threads 0 and 1.
You need to calculate an offset into the array for each thread, which is just size/6 * param. The number of elements will be size/6 except for the last chunk, which uses a modulus to get the remainder.
As mentioned in the comments, the argument to the thread function should be a pointer, not int. You can hide an integer in the pointer, but you need to use explicit casts.
void *threadFct(void* param_ptr)
{
int param = (int)param_ptr;
int start = size/6 * param;
int length;
if (param < 5) {
length = size/6;
} else {
length = size - 5 * (size/6);
}
qsort((void*)(bigArr+start), length, sizeof(*bigArr), _comp);
pthread_exit(0);
}
and later
pthread_create(&tid[i], NULL, &threadFct, (void*)i);

Error trying to compile program from instructor (symbols expected in define line)

I was given a template from my instructor and then modified the code to frequency histogram of a given text. However i'm getting errors trying to compile the code. I believe the errors indicated by the compiler is at the beginning of the code. The errors have been taken a screenshot of and attached below. Thanks in advance
errors:
test.c:6:25: error: expected declaration specifiers or '...' before '\x20'
#define FIRST_PRINTABLE ' ' // Space character code 32, see Etter 2016 text, pp. 418-420
^
test.c:8:30: note: in expansion of macro 'FIRST_PRINTABLE'
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
^~~~~~~~~~~~~~~
test.c:11:38: note: in expansion of macro 'NUM_PRINTABLE'
void init_array(int histogram[], int NUM_PRINTABLE);
^~~~~~~~~~~~~
code:
/*
* Comp120 - Lab 7: Starter project -- Complete this code
* Character Frequency analysis -- read a text file and display frequency
* analysis
* for all printable characters.
*
* Author: J. Fall
* Date: Feb. 2017
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
// Definition of printable character set
#define FIRST_PRINTABLE ' ' // Space character code 32, see Etter 2016 text, pp. 418-420
#define LAST_PRINTABLE '~'
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
// Function prototypes:
void init_array(int histogram[], int NUM_PRINTABLE);
int sum_array(const int histogram[], int NUM_PRINTABLE);
bool isPrintable(char c);
void compute_frequency(char* filename, int histogram[], int NUM_PRINTABLE);
void write_histogram(int histogram[], int NUM_PRINTABLE);
FILE* openFileRead(char* filename);
int main( int argc, char* argv[] )
{
if (argc < 2) {
printf("Usage: freq inputFile \n");
exit(-1);
}
int histogram[NUM_PRINTABLE]; // Array of counters -- one for each printible character
compute_frequency(argv[1], histogram, NUM_PRINTABLE);
write_histogram(histogram, NUM_PRINTABLE);
printf( "Program complete. \n" );
return 0 ;
}
/*
* Initialize the array of integers of given length to all zeros
*/
void init_array(int histogram[], int NUM_PRINTABLE)
{
int i = 0;
for(i=0;i<NUM_PRINTABLE;i++){
histogram[i]=0;
} // TODO: write function to assign 0 too every array element.
}
/*
* Return the sum of all items in the given array
*/
int sum_array(const int array[], int NUM_PRINTABLE)
{
int i = 0;
int sum = 0;
for(i=0;i<NUM_PRINTABLE;i++){
sum = sum + histogram[i];
}
return sum; // TODO: write function to add up every element in the given array.
}
/*
* Return true iff the character is PRINTABLE
*/
bool isPrintable(char c)
{
if (c >= FIRST_PRINTABLE && <= LAST_PRINTABLE){
return true;
}
else
return false; // TODO: write function to return true iff c is a printable character
}
/*
* Compute the frequency histogram for all PRINTABLE characters in the given file
*/
void compute_frequency(char* filename, int histogram[], int NUM_PRINTABLE)
{
FILE* inputFile = openFileRead(filename);
init_array(histogram, NUM_PRINTABLE);
char c = getc(inputFile); // priming read -- read first character from file
while (c != EOF) {
if(isPrintable(c)){
int bin = c - FIRST_PRINTABLE;// TODO: write algorithm to count the number of times character c occurs.
histogram[bin]++;
}
// HINT: since array indexes start at zero, map the ASCII code for each
// printable charcter onto an index by subtracting FIRST_PRINTABLE
// After processing previous character, read next character from file to re-prime the loop
c = getc(inputFile);
}
}
/*
* Write the frequency histogram out to the given file
*/
void write_histogram(int histogram[], int NUM_PRINTABLE)
{
FILE* outputFile = stdout; // Simplifictaion: output is written to the console instead of to an output file.
int total_count = sum_array(histogram, NUM_PRINTABLE);
fprintf(outputFile, "Frequency Analysis Results. Input contained %d printable characters. \n", total_count);
fprintf(outputFile, "Char | Frequency \n");
fprintf(outputFile, "____ | _________ \n");
int i;
for (i=0; i<NUM_PRINTABLE; i++) {
char ch = (char) (i + FIRST_PRINTABLE);
double freq;
if (histogram[i] > 0) {
double freq = histogram[i]/(double)total_count * 100;
fprintf(outputFile, "%3c | %9.3f%% \n", ch, freq);
}
else
fprintf(outputFile, "%3c | %9d%% \n", ch, 0);
}
}
/*
* Attempt to open the file for read access.
* Peforms error check and exits if file is not accessible
*/
FILE* openFileRead(char* filename)
{
FILE* inFile = fopen(filename, "r" );
if( inFile == NULL) {
printf( "Error opening input file %s, program terminating!\n", filename);
exit(-1);
}
return inFile;
}
#define NUM_PRINTABLE (int) (FIRST_PRINTABLE-LAST_PRINTABLE+1)
// Function prototypes:
void init_array(int histogram[], int NUM_PRINTABLE);
when those both lines are expanded by preprocessor it translates as (you can use gcc -E on the source to see it in action):
void init_array(int histogram[], int (int) (' '-'~'+1));
which is obviously a syntax error. Just use NUM_PRINTABLE as a constant in your functions, not as a parameter.
Aside, the macro is functionnaly wrong, it should be
#define NUM_PRINTABLE (LAST_PRINTABLE-FIRST_PRINTABLE+1)
or the value would be negative. (and you don't need to cast to int since character literals are already int)

Resources