I have an archive results.csv and I need to read the first line of this archive and print it out on output.txt. Somehow it's printing random characters after everything and I couldn't figure out what is wrong.
Command: a.c results.csv
First line:
date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
output.txt: date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,(!£,(!£,(!£,(!£,(!£,#,£,(!£,(!£
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef struct
{
char *line1;
char *line1a;
char *line1b;
char *team1;
char *team2;
char *reason;
char *city;
char *country;
char *neutral_field;
}data;
void open_input(char *argv[], FILE **input)
{
if((*input=fopen(argv[1], "r")) == NULL)
{
printf("%s not found\n", argv[1]);
exit(1);
}
}
void open_output(char *string, FILE **output)
{
if((*output=fopen(string, "w")) == NULL)
{
printf("%s not found\n", string);
exit(1);
}
}
void alloc_data(data *d, int size)
{
d->line1 = (char*)malloc(4*sizeof(char));
d->team1 = (char*)malloc(9*sizeof(char));
d->team2 = (char*)malloc(9*sizeof(char));
d->line1a = (char*)malloc(10*sizeof(char));
d->line1b = (char*)malloc(10*sizeof(char));
d->reason = (char*)malloc(10*sizeof(char));
d->city = (char*)malloc(4*sizeof(char));
d->country = (char*)malloc(7*sizeof(char));
d->neutral_field = (char*)malloc(7*sizeof(char));
}
void store(data *d, FILE *input, FILE **output)
{
fscanf(input, "%s,%s,%s,%s,%s,%s,%s,%s,%s", d[0].line1, d[0].team1, d[0].team2, d[0].line1a, d[0].line1b, d[0].reason, d[0].city, d[0].country, d[0].neutral_field );
fprintf(*output, "%s,%s,%s,%s,%s,%s,%s,%s,%s\n", d[0].line1, d[0].team1, d[0].team2, d[0].line1a, d[0].line1b, d[0].reason, d[0].city, d[0].country, d[0].neutral_field );
}
int main(int argc, char *argv[])
{
FILE *input;
FILE *output;
char *string = "output.txt";
int size = 1000;
open_input(argv, &input);
open_output(string, &output);
data *d;
d = (data*)malloc(size*sizeof(data));
alloc_data(d, size);
store(d, input, &output);
free(d);
return 0;
}
fscanf(input, "%s,%s,%s,%s,%s,%s,%s,%s,%s", d[0].line1, d[0].team1,...
The above code tries to read the whole line in to d[0].line1 which causes buffer overflow. team1 and the rest will contain uninitialized data.
You have to change fscanf as follows:
fscanf(input, "%3[^ ,\n\t],%9[^ ,\n\t],...
Where 3 is 4 - 1, and 4 is the size of d[0].line1
Alternatively you can use strtok
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void store(FILE *input, FILE *output)
{
char buf[500];
while(fgets(buf, sizeof(buf), input))
{
//strip end-of-line from `buf`
if(strlen(buf))
if(buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = 0;
//tokenize with strtok
char *token = strtok(buf, ",");
while(token)
{
fprintf(output, "%s", token);
token = strtok(NULL, ",");
}
fprintf(output, "\n");
}
}
int main(int argc, char *argv[])
{
FILE *input = fopen("input.txt", "r");
FILE *output = fopen("output.txt", "w");
store(input, output);
return 0;
}
With above code you don't need an additional structure.
If you do use a structure for data, you have to be more careful. It seems you are trying to create an array of 1000 data, but the following only creates one oversized pointer, not an array of data
int size = 1000;
data *d;
d = (data*)malloc(size*sizeof(data));
alloc_data(d, size);
Additionally, for each malloc there should be a corresponding free.
Your buffers aren't big enough to hold the terminating NUL byte. scanf stores that NUL byte (overrunning the buffer), but then the object that really owns that byte may overwrite it, so when printf looks for the NUL it doesn't find it until much later in memory.
The buffer overruns are a bigger problem than what you've seen, who knows what objects those NUL bytes you didn't make space for are smashing? And what happens when you read a data file with slightly different header spelling? Suddenly your hard-coded allocations sizes will be even more wrong than they are already.
Related
Description:
I have created a small program that stores the name and checksum of a file in a struct, for each file in a directory. When output is written to stdout with printf, everything seems fine, but if we write to a file with either fputs or fprintf, values get overwritten, perhaps because of some buffer overflow?
Output from main with print.
Name: 2.txt. Checksum: fc769d448ed4e08bd855927bad2c8e43efdf5315a6daa9f28577758786d52eaf
Name: 1.txt. Checksum: 2d46cffd0302c5537ddb4952a9cca7d66060dafecd56fe3a7fe8e5e5cabbbbf9
Name: 3.txt. Checksum: 37bb2e5563e94eee68fac6b07501c44f018599482e897a626a94dd88053b4b7e
However, if we print the values of checksumMaps[0] to a file,
the value checksumMaps[0].filename gets overwritten (with the last 2 bytes of the checksum string) as seen by:
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
The program writes aftxt to stdout instead of 2.txt.
Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Minimal Reproducible Example
ArchiveFile.h
typedef struct ArchiveFile{
char *uuid;
char *checksum;
char *relative_path;
int is_binary;
} ArchiveFile;
typedef struct file_content{
unsigned char* bytes;
unsigned long file_size;
} file_content;
void set_uuid(ArchiveFile *file, char* uuid);
char* get_absolute_path(ArchiveFile *file, char* root);
char* get_file_text(ArchiveFile *file, char* root);
void get_bytes(ArchiveFile *file, char* root, unsigned char *buffer, size_t fsize);
long get_file_size(ArchiveFile *file, char *root);
ArchiveFile.c
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include "ArchiveFile.h"
#include <string.h>
void set_uuid(ArchiveFile* file, char* uuid){
file->uuid = uuid;
}
char* get_absolute_path(ArchiveFile *file, char* root){
/* Allocate space according to the relative path +
the root path + null terminating byte.*/
char* absolute_path = malloc(strlen(file->relative_path) + strlen(root) + 1);
// Add the root path.
strcpy(absolute_path, root);
// Concatonate the root with the rest of the path.
strcat(absolute_path, file->relative_path);
return absolute_path;
}
char* get_file_text(ArchiveFile *file, char* root){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "r");
if(fp == NULL)
printf("Could not open file %s \n", absolute_path);
// Platform independent way of getting the file size in bytes.
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
char *buffer = malloc(fsize);
if(fp){
fread(buffer, sizeof(char), fsize, fp);
}
fclose(fp);
free(absolute_path);
return buffer;
}
void print_bytes2(unsigned char* md, size_t size){
for (size_t i = 0; i < size; i++) {
printf("%02x ", md[i]);
}
printf("\n");
}
void get_bytes(ArchiveFile *file, char *root, unsigned char *buffer, size_t fsize){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "rb");
if(fp){
fread(buffer, 1, fsize, fp);
}
free(absolute_path);
fclose(fp);
}
long get_file_size(ArchiveFile *file, char *root){
char* filepath = get_absolute_path(file, root);
FILE *fp = fopen(filepath, "rb");
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
free(filepath);
fclose(fp);
return fsize;
}
checksum/checksum.h
// Used to store information about filename and checksum.
typedef struct ChecksumMap{
char* filename;
unsigned char checksum [32];
} ChecksumMap;
int calculate_checksum(void* input, unsigned long length, unsigned char* md);
checksum/checksum.h
#include <stdio.h>
#include <openssl/sha.h>
#include "checksum.h"
int calculate_checksum(void* input, unsigned long length, unsigned char* md){
SHA256_CTX context;
if(!SHA256_Init(&context))
return 0;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return 0;
if(!SHA256_Final(md, &context))
return 0;
return 1;
}
main.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include "ArchiveFile.h"
#include "checksum/checksum.h"
void format_bytes(char* buffer, unsigned char* md){
for (int i = 0; i < 32; i++) {
sprintf(&buffer[i*2], "%02x", md[i]);
}
buffer[32*2] = '\0';
}
void *listdir(char *name, int count, ChecksumMap *checksumMaps)
{
DIR *dir;
struct dirent *direntry;
if (!(dir = opendir(name)))
return NULL;
while ((direntry = readdir(dir)) != NULL) {
// If we reach a directory (that is not . or ..) then recursive step.
if (direntry->d_type == DT_DIR) {
char path[1024];
if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, direntry->d_name);
listdir(path, count, checksumMaps);
} else {
unsigned char md[32];
ArchiveFile file;
file.relative_path = direntry->d_name;
// Get the full path of the file:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name));
strcat(parent_name, name);
strcat(parent_name, "/");
size_t fsize = get_file_size(&file, parent_name);
unsigned char *bytes = malloc(sizeof(char) * fsize);
get_bytes(&file, parent_name, bytes, fsize);
calculate_checksum((void*) bytes, fsize, md);
ChecksumMap checksumMap = {.filename=file.relative_path};
memcpy(checksumMap.checksum, md,
sizeof(checksumMap.checksum));
free(bytes);
}
}
closedir(dir);
return NULL;
}
int main(int argc, char const *argv[]) {
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
}
Compile with gcc:
gcc -Wall -Wextra main.c ArchiveFile.c checksum/checksum.c -lcrypto
The program writes aftxt to stdout instead of 2.txt. Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Hard to say, because we're in the domain of UB (undefined behavior). But there are two obvious candidates here.
formatted_bytes is not properly terminated, causing fputs to read past the array, invoking UB.
fp is not a valid stream. The reason could be that it's not initialized, or changed, or the stream is closed or something.
Enable -Wall -Wextra -fsanitize=address. You could also try -fsanitize=undefined.
Check all return values. malloc, fopen and fputs returns a value that can be used for error checking.
Replace formatted_bytes with a hardcoded string that have the value you think it has.
Learn how to create a Minimal, Reproducible Example and how to debug small c programs. It's a guide I wrote a while ago.
Update
It seems that there was some different problems with the code.
First thing to notice is file.relative_path = direntry->d_name;, however the value that direntry points to changes in each iteration, thus the value file.relative_path points to, also changes. Furthermore, the size of the string stored in file.relative_path has never been specified, which would be a problem, if we use strcpy.
The solution is to specify a size for file.relative_path and use strcpy to copy the value of direntry->d_name. Also, no need for the checksumMap struct, since ArchiveFile already can store the same information (again, specify a size for the checksum).
Thing to keep in mind when you work with strings, buffers, arrays in C:
Remember that strings in C are based on char arrays, themselves based on a pointer to the first element. Assigning the value of one string to another might return in unexpected behavior when you actually want to copy the value of the string, not the address to the first element.
One bug here:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name)); // could have been parent_name[0]='\0'; instead
strcat(parent_name, name); // Now the parent_name buffer is full and null terminated.
strcat(parent_name, "/"); // this overwrites the null terminator and writes a new one out-of-bounds
You should have done something like this:
size_t length = strlen(name);
char parent_name[length+1+1];
memcpy(parent_name, name, length); // copies characters only (fast) but not the null term
parent_name[length] = '/'; // append this single character 1 symbol past "name" string
parent_name[length+1] = '\0'; // add manual null termination
I have an archive results.csv and I need to read the first two lines of this archive, split the second one and print them out on output.txt. Somehow it's not printing anything, yet I don't know the reason.
I didn't add the functions that I'm sure are fine.
Command: a.c results.csv
First line:
date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
Second line:
18721130,Scotland,England,0,0,Friendly,Glasgow,Scotland,FALSE
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef struct
{
char *line1;
long int date;
char *h_team;
char *a_team;
int gols_h_team;
int gols_a_team;
char *reason;
char *city;
char *country;
char *neutral_field;
}Data;
void alloc_Data(Data *d, int size)
{
d->line1 = (char*)malloc(50*sizeof(char));
d->h_team = (char*)malloc(30*sizeof(char));
d->a_team = (char*)malloc(30*sizeof(char));
d->reason = (char*)malloc(30*sizeof(char));
d->city = (char*)malloc(30*sizeof(char));
d->country = (char*)malloc(30*sizeof(char));
d->neutral_field = (char*)malloc(9*sizeof(char));
}
void store(Data *d, FILE *input, FILE *output, int size)
{
fscanf(input, "%s", d[0].line1);
fprintf(output, "%s\n", d[0].line1);
for(int i = 1; i < size; i++)
{
fscanf(input, "%li,%[^,]s%[^,]s%d,%d,%[^,]s%[^,]s%[^,]s%[^,]s", &d[i].date, d[i].h_team, d[i].a_team, &d[i].gols_h_team, &d[i].gols_a_team, d[i].reason, d[i].city, d[i].country, d[i].neutral_field );
fprintf(output, "%li,%s,%s,%d,%d,%s,%s,%s,%s\n", d[i].date, d[i].h_team, d[i].a_team, d[i].gols_h_team, d[i].gols_a_team, d[i].reason, d[i].city, d[i].country, d[i].neutral_field );
}
}
int main(int argc, char *argv[])
{
FILE *input;
FILE *output;
char *string = "output.txt";
int size = 2;
open_input(argv, &input);
open_output(string, &output);
Data *d;
d = (Data*)malloc(size*sizeof(Data));
alloc_Data(d, size);
store(d, input, output, size);
free(d);
return 0;
}
OP's fscanf() format is messed up with an s after %[^,] and missing ,. #Gem Taylor
A better alternative is to read all lines using fgets(). (including the first)
// fscanf(input, "%li,%[^,]s%[^,]s%d,%d,%[^,]s%[^,]s%[^,]s%[^,]s",...d[i].neutral_field );
#define EXPECTED_MAX_BUFFER_SIZE 150
char buffer[EXPECTED_MAX_BUFFER_SIZE * 2]; // suggest using 2x expected max size
if (fgets(buffer, sizeof buffer, input)) {
Then parse the team results with sscanf(). Using a " %n" at the end is an easy way to test if the entire sscanf() succeeded with no extra junk. sscanf() deserve width limits and complex scans benefit with some defines to manage the specifiers. Notice no s after "%29[^,]"
int n = 0;
#define FDat "%li ,"
#define FScr "%d ,"
#define Ftxt " %29[^,],"
#define Fneu " %8[^,]"
sscanf(buffer, FDat Ftxt Ftxt FScr FScr Ftxt Ftxt Ftxt Fneu " %n",
&d[i].date, d[i].h_team, d[i].a_team,
&d[i].gols_h_team, &d[i].gols_a_team,
d[i].reason, d[i].city, d[i].country, d[i].neutral_field, &n);
if (n > 0 && buffer[n] == '\0') {
// success
fprintf(output, ...
} else {
// bad input
}
I am using three files, my reverse.c, file_utils.h, and file_utils.c however after compiling successfully and executing I'm receiving a Segmentation Fault (core dumped). I'm unsure where the problem is. Thank you for any input!
Reverse.c File:
#include "file_utils.h"
#include <stdio.h>
#include <sys/stat.h>
//reverse function
//calls two parameters for a pointer to the input file and a pointer to the output file
//outputs the input file into the output in reverse
int reverse(char* inputFile, char* outputFile) {
char* buffer;
read_file(inputFile, &buffer);
struct stat st;
stat(inputFile, &st);
int size = st.st_size;
write_file(outputFile, buffer, size);
}
int main(int argc, char** argv[]) {
char* input;
char* output;
if(argv[1] != NULL) {
input = argv[1];
}
else {
printf("No input file detected.");
}
if(argv[2] != NULL) {
output = argv[2];
}
else {
printf("No output file detected.");
}
reverse(input, output);
}
file_utils.h Header:
#include <stdio.h>
void setup(int argc, char** argv);
int read_file( char* filename, char **buffer);
int write_file( char* filename, char *buffer, int size);
file_utils.c file:
#include "file_utils.h"
#include "stdlib.h"
//read_file function has two parameters which call a pointer to the input filenmae and
//a pointer to the pointer where the buffer will be stored
//returns the pointer to the newBuffer
int read_file( char* filename, char **buffer ) {
FILE *file;
file = fopen(filename, "r");
char* newBuffer = buffer;
int c;
size_t n = 0;
if(file == NULL) {
fprintf( stderr, "No file found. ");
return -1;
}
// length of file code found on stackoverflow.com/questions/4823177/reading-a-file
// given by user 'Justin'
fseek(file, 0, SEEK_END);
long f_size = ftell(file);
fseek(file, 0, SEEK_SET);
buffer = malloc(f_size);
if(newBuffer == NULL || newBuffer < 0) {
fprintf(stderr, "Memory allocation error. ");
}
while((c = fgetc(file)) != EOF) {
newBuffer[n++] = (char) c;
}
newBuffer[n] = '\0';
return *newBuffer;
}
//write_file function has three parameters which consist of a pointer to the output file,
//the actual value of the buffer pointer, and the size of the buffer
int write_file( char* filename, char *buffer, int size) {
fwrite(buffer, sizeof(char), size, filename);
}
My code is not putting the text file data into line on the second pass of the while loop, and any subsequent pass. I'm sure it's a silly error but I cannot find it.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE *fr;
char *line,*word,*vali;
ssize_t read;
int i=0,sum=0,len =0,flag=0;
const char delim[2]=" ";
int main(int argc, char* argv[])
{
line = (char *)malloc(sizeof(&len));
word = (char *)malloc(sizeof(&len));
vali = (char *)malloc(sizeof(&len));
fr = fopen(argv[1], "r");
if(fr==NULL)
{
exit(EXIT_FAILURE);
}
while ((read = getline(&line, &len, fr)) != -1)
{
printf("line is %s\n", line );
fscanf(fr,"%s%*[^\n]",word);
printf("%s ", word);
vali=strtok(line, delim);
while(vali != NULL)
{
sum=sum+atoi(vali);
vali = strtok(NULL, delim);
}
printf("%d\n", sum);
sum=0;
vali=" ";
len = strlen(line);
}
fclose(fr);
if (line)
free(line);
return 0;
}
If len is some integral type containing the desired length of the first line, then:
&len
Has type pointer-to-integer, and
sizeof(&len)
Returns the size of a pointer (8 bytes on most 64 bit systems) and
malloc(sizeof(&len))
Allocates only 8 bytes of memory (or whatever pointer size is on your system).
This is probably at least part of the issue.
I have to read in a file such as
apple
grape
banana
And store it into a string, but fgets only reads up to the newline and stops, so its only reading in apple.
How do I get around this? Or how can I store the three words all as separate strings?
char* readFile(const char *fileName)
{
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = malloc(sizeof(char*)*50);
char *data = fgets(stringInFile,50,inFile);
printf("%s", data);
fclose(inFile);
return data;
}
This is all in C btw.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** readFile(const char *fileName);
int main(int argc, char **argv){
char **data = readFile(argv[1]);
int i;
for(i=0; data[i] ; ++i){
puts(data[i]);
free(data[i]);
}
free(data);
return 0;
}
char** readFile(const char *fileName){
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = calloc(50, sizeof(char*));
char line[256];
int i = 0;
while(fgets(line, sizeof(line), inFile)){
char *p = strchr(line, '\n');
if(p)
*p = 0;
if(i < 50 - 1)
stringInFile[i++] = strdup(line);
}
fclose(inFile);
return stringInFile;
}
fgets() is only reading one Line by each call, and sets the file courser to the next line. If you want to read the fully file, you have to iterate it. To check if you are at the end, you can check for the EOF flag with feof().
Resulting in, for me working:
char* readFile(const char *fileName)
{
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = malloc(sizeof(char*)*50);
while(!feof(inFile))
{
printf("%s", fgets(stringInFile,50,inFile));
}
fclose(inFile);
return stringInFile;
}
And, you don't need the variable data - fgets() first parameter is a character Array, where is it automatical stored(for example Apple in your programm).