I'm really new at C programing, so I need to read a big file, split it every point, and write in a new file what I got when splitting, so I have to write several files, the problem is when I'm naming the new files.
I've been working in this proyect like for a week and I can't fix this issue.
Here the code I have so far:
#include <stdio.h>
#include <string.h>
char *myRead(char file_name[]){
char *output;
long size;
FILE *myfile = fopen(file_name,"rb");
fseek(myfile, 0, SEEK_END);
size = ftell(myfile);
rewind(myfile);
output = (char*) malloc(sizeof(char) * size);
fread(output,1,size,myfile);
fclose(myfile);
return output;
}
void myWrite(char content[], int i){
FILE *myfile;
myfile = fopen(i,"w");
fprintf(myfile,"%s",content);
fclose(myfile);
}
void split(char *content){
int word_length = strlen(content);
int i = 0;
char *output = strtok (content,".");
while (output != NULL){
myWrite(output,i);
printf("%s\n", output);
output = strtok (NULL, ".");
i++;
}
}
int main(){
char file_name[] = "hourglass.txt";
char *content = myRead(file_name);
split(content);
return 0;
}
What I want to know it's how can I do several files with just a number for the name?
Change
myfile = fopen(i,"w");
to
char file_name[100];
sprintf(filename, "%d", i);
myfile = fopen(file_name, "w");
That should fix it for you
C's fopen has signature:
FILE* fopen(const char *filename, const char *mode)
when you call with i in myWrite you are telling it a string is located at that address, where likely there is garbage. If this isn't homework explain and I can elaborate but in case it is I'll just point you to itoa.
Related
Description:
I have created a small program that stores the name and checksum of a file in a struct, for each file in a directory. When output is written to stdout with printf, everything seems fine, but if we write to a file with either fputs or fprintf, values get overwritten, perhaps because of some buffer overflow?
Output from main with print.
Name: 2.txt. Checksum: fc769d448ed4e08bd855927bad2c8e43efdf5315a6daa9f28577758786d52eaf
Name: 1.txt. Checksum: 2d46cffd0302c5537ddb4952a9cca7d66060dafecd56fe3a7fe8e5e5cabbbbf9
Name: 3.txt. Checksum: 37bb2e5563e94eee68fac6b07501c44f018599482e897a626a94dd88053b4b7e
However, if we print the values of checksumMaps[0] to a file,
the value checksumMaps[0].filename gets overwritten (with the last 2 bytes of the checksum string) as seen by:
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
The program writes aftxt to stdout instead of 2.txt.
Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Minimal Reproducible Example
ArchiveFile.h
typedef struct ArchiveFile{
char *uuid;
char *checksum;
char *relative_path;
int is_binary;
} ArchiveFile;
typedef struct file_content{
unsigned char* bytes;
unsigned long file_size;
} file_content;
void set_uuid(ArchiveFile *file, char* uuid);
char* get_absolute_path(ArchiveFile *file, char* root);
char* get_file_text(ArchiveFile *file, char* root);
void get_bytes(ArchiveFile *file, char* root, unsigned char *buffer, size_t fsize);
long get_file_size(ArchiveFile *file, char *root);
ArchiveFile.c
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include "ArchiveFile.h"
#include <string.h>
void set_uuid(ArchiveFile* file, char* uuid){
file->uuid = uuid;
}
char* get_absolute_path(ArchiveFile *file, char* root){
/* Allocate space according to the relative path +
the root path + null terminating byte.*/
char* absolute_path = malloc(strlen(file->relative_path) + strlen(root) + 1);
// Add the root path.
strcpy(absolute_path, root);
// Concatonate the root with the rest of the path.
strcat(absolute_path, file->relative_path);
return absolute_path;
}
char* get_file_text(ArchiveFile *file, char* root){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "r");
if(fp == NULL)
printf("Could not open file %s \n", absolute_path);
// Platform independent way of getting the file size in bytes.
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
char *buffer = malloc(fsize);
if(fp){
fread(buffer, sizeof(char), fsize, fp);
}
fclose(fp);
free(absolute_path);
return buffer;
}
void print_bytes2(unsigned char* md, size_t size){
for (size_t i = 0; i < size; i++) {
printf("%02x ", md[i]);
}
printf("\n");
}
void get_bytes(ArchiveFile *file, char *root, unsigned char *buffer, size_t fsize){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "rb");
if(fp){
fread(buffer, 1, fsize, fp);
}
free(absolute_path);
fclose(fp);
}
long get_file_size(ArchiveFile *file, char *root){
char* filepath = get_absolute_path(file, root);
FILE *fp = fopen(filepath, "rb");
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
free(filepath);
fclose(fp);
return fsize;
}
checksum/checksum.h
// Used to store information about filename and checksum.
typedef struct ChecksumMap{
char* filename;
unsigned char checksum [32];
} ChecksumMap;
int calculate_checksum(void* input, unsigned long length, unsigned char* md);
checksum/checksum.h
#include <stdio.h>
#include <openssl/sha.h>
#include "checksum.h"
int calculate_checksum(void* input, unsigned long length, unsigned char* md){
SHA256_CTX context;
if(!SHA256_Init(&context))
return 0;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return 0;
if(!SHA256_Final(md, &context))
return 0;
return 1;
}
main.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include "ArchiveFile.h"
#include "checksum/checksum.h"
void format_bytes(char* buffer, unsigned char* md){
for (int i = 0; i < 32; i++) {
sprintf(&buffer[i*2], "%02x", md[i]);
}
buffer[32*2] = '\0';
}
void *listdir(char *name, int count, ChecksumMap *checksumMaps)
{
DIR *dir;
struct dirent *direntry;
if (!(dir = opendir(name)))
return NULL;
while ((direntry = readdir(dir)) != NULL) {
// If we reach a directory (that is not . or ..) then recursive step.
if (direntry->d_type == DT_DIR) {
char path[1024];
if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, direntry->d_name);
listdir(path, count, checksumMaps);
} else {
unsigned char md[32];
ArchiveFile file;
file.relative_path = direntry->d_name;
// Get the full path of the file:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name));
strcat(parent_name, name);
strcat(parent_name, "/");
size_t fsize = get_file_size(&file, parent_name);
unsigned char *bytes = malloc(sizeof(char) * fsize);
get_bytes(&file, parent_name, bytes, fsize);
calculate_checksum((void*) bytes, fsize, md);
ChecksumMap checksumMap = {.filename=file.relative_path};
memcpy(checksumMap.checksum, md,
sizeof(checksumMap.checksum));
free(bytes);
}
}
closedir(dir);
return NULL;
}
int main(int argc, char const *argv[]) {
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
}
Compile with gcc:
gcc -Wall -Wextra main.c ArchiveFile.c checksum/checksum.c -lcrypto
The program writes aftxt to stdout instead of 2.txt. Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Hard to say, because we're in the domain of UB (undefined behavior). But there are two obvious candidates here.
formatted_bytes is not properly terminated, causing fputs to read past the array, invoking UB.
fp is not a valid stream. The reason could be that it's not initialized, or changed, or the stream is closed or something.
Enable -Wall -Wextra -fsanitize=address. You could also try -fsanitize=undefined.
Check all return values. malloc, fopen and fputs returns a value that can be used for error checking.
Replace formatted_bytes with a hardcoded string that have the value you think it has.
Learn how to create a Minimal, Reproducible Example and how to debug small c programs. It's a guide I wrote a while ago.
Update
It seems that there was some different problems with the code.
First thing to notice is file.relative_path = direntry->d_name;, however the value that direntry points to changes in each iteration, thus the value file.relative_path points to, also changes. Furthermore, the size of the string stored in file.relative_path has never been specified, which would be a problem, if we use strcpy.
The solution is to specify a size for file.relative_path and use strcpy to copy the value of direntry->d_name. Also, no need for the checksumMap struct, since ArchiveFile already can store the same information (again, specify a size for the checksum).
Thing to keep in mind when you work with strings, buffers, arrays in C:
Remember that strings in C are based on char arrays, themselves based on a pointer to the first element. Assigning the value of one string to another might return in unexpected behavior when you actually want to copy the value of the string, not the address to the first element.
One bug here:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name)); // could have been parent_name[0]='\0'; instead
strcat(parent_name, name); // Now the parent_name buffer is full and null terminated.
strcat(parent_name, "/"); // this overwrites the null terminator and writes a new one out-of-bounds
You should have done something like this:
size_t length = strlen(name);
char parent_name[length+1+1];
memcpy(parent_name, name, length); // copies characters only (fast) but not the null term
parent_name[length] = '/'; // append this single character 1 symbol past "name" string
parent_name[length+1] = '\0'; // add manual null termination
I have an archive results.csv and I need to read the first line of this archive and print it out on output.txt. Somehow it's printing random characters after everything and I couldn't figure out what is wrong.
Command: a.c results.csv
First line:
date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
output.txt: date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,(!£,(!£,(!£,(!£,(!£,#,£,(!£,(!£
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef struct
{
char *line1;
char *line1a;
char *line1b;
char *team1;
char *team2;
char *reason;
char *city;
char *country;
char *neutral_field;
}data;
void open_input(char *argv[], FILE **input)
{
if((*input=fopen(argv[1], "r")) == NULL)
{
printf("%s not found\n", argv[1]);
exit(1);
}
}
void open_output(char *string, FILE **output)
{
if((*output=fopen(string, "w")) == NULL)
{
printf("%s not found\n", string);
exit(1);
}
}
void alloc_data(data *d, int size)
{
d->line1 = (char*)malloc(4*sizeof(char));
d->team1 = (char*)malloc(9*sizeof(char));
d->team2 = (char*)malloc(9*sizeof(char));
d->line1a = (char*)malloc(10*sizeof(char));
d->line1b = (char*)malloc(10*sizeof(char));
d->reason = (char*)malloc(10*sizeof(char));
d->city = (char*)malloc(4*sizeof(char));
d->country = (char*)malloc(7*sizeof(char));
d->neutral_field = (char*)malloc(7*sizeof(char));
}
void store(data *d, FILE *input, FILE **output)
{
fscanf(input, "%s,%s,%s,%s,%s,%s,%s,%s,%s", d[0].line1, d[0].team1, d[0].team2, d[0].line1a, d[0].line1b, d[0].reason, d[0].city, d[0].country, d[0].neutral_field );
fprintf(*output, "%s,%s,%s,%s,%s,%s,%s,%s,%s\n", d[0].line1, d[0].team1, d[0].team2, d[0].line1a, d[0].line1b, d[0].reason, d[0].city, d[0].country, d[0].neutral_field );
}
int main(int argc, char *argv[])
{
FILE *input;
FILE *output;
char *string = "output.txt";
int size = 1000;
open_input(argv, &input);
open_output(string, &output);
data *d;
d = (data*)malloc(size*sizeof(data));
alloc_data(d, size);
store(d, input, &output);
free(d);
return 0;
}
fscanf(input, "%s,%s,%s,%s,%s,%s,%s,%s,%s", d[0].line1, d[0].team1,...
The above code tries to read the whole line in to d[0].line1 which causes buffer overflow. team1 and the rest will contain uninitialized data.
You have to change fscanf as follows:
fscanf(input, "%3[^ ,\n\t],%9[^ ,\n\t],...
Where 3 is 4 - 1, and 4 is the size of d[0].line1
Alternatively you can use strtok
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void store(FILE *input, FILE *output)
{
char buf[500];
while(fgets(buf, sizeof(buf), input))
{
//strip end-of-line from `buf`
if(strlen(buf))
if(buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = 0;
//tokenize with strtok
char *token = strtok(buf, ",");
while(token)
{
fprintf(output, "%s", token);
token = strtok(NULL, ",");
}
fprintf(output, "\n");
}
}
int main(int argc, char *argv[])
{
FILE *input = fopen("input.txt", "r");
FILE *output = fopen("output.txt", "w");
store(input, output);
return 0;
}
With above code you don't need an additional structure.
If you do use a structure for data, you have to be more careful. It seems you are trying to create an array of 1000 data, but the following only creates one oversized pointer, not an array of data
int size = 1000;
data *d;
d = (data*)malloc(size*sizeof(data));
alloc_data(d, size);
Additionally, for each malloc there should be a corresponding free.
Your buffers aren't big enough to hold the terminating NUL byte. scanf stores that NUL byte (overrunning the buffer), but then the object that really owns that byte may overwrite it, so when printf looks for the NUL it doesn't find it until much later in memory.
The buffer overruns are a bigger problem than what you've seen, who knows what objects those NUL bytes you didn't make space for are smashing? And what happens when you read a data file with slightly different header spelling? Suddenly your hard-coded allocations sizes will be even more wrong than they are already.
I'm trying a very simple thing: read a minimal text file and compress it with the compress() utility from zlib. I think I've done everything fine, I allocate filesize * 10 for the output, it should be more that enough, but I keep getting -5 (Z_BUF_ERROR) as result of the operation.
Any help?
#include <stdio.h>
#include <stdlib.h>
#include "zlib.h"
#define FILE_TO_OPEN "text.txt"
static char* readcontent(const char *filename, int* size)
{
char* fcontent = NULL;
int fsize = 0;
FILE* fp = fopen(filename, "r");
if(fp) {
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
rewind(fp);
fcontent = (char*) malloc(sizeof(char) * fsize);
fread(fcontent, 1, fsize, fp);
fclose(fp);
}
*size = fsize;
return fcontent;
}
int main(int argc, char const *argv[])
{
int input_size;
char* content_of_file = readcontent(FILE_TO_OPEN, &input_size);
printf("%d\n", input_size);
uLongf compressed_data_size;
char* compressed_data = malloc(sizeof(char) * (input_size * 10));
int result = compress((Bytef*) compressed_data, (uLongf*)&compressed_data_size, (const Bytef*)content_of_file, (uLongf)input_size);
printf("%d\n", result);
return 0;
}
Use fopen(filename, "rb"). If you are on Windows that b is important to avoid corruption of binary data.
Use compressBound() in zlib instead of input_size * 10 and set compressed_data_size before calling compress(). (You do not need to and should not write your own compressBound().)
Try
uLongf compressed_data_size = compressBound(input_size);
compressBound should be available in zlib.
Also you are better of probably using rb in fopen like I mentioned in my comment before.
I have to read in a file such as
apple
grape
banana
And store it into a string, but fgets only reads up to the newline and stops, so its only reading in apple.
How do I get around this? Or how can I store the three words all as separate strings?
char* readFile(const char *fileName)
{
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = malloc(sizeof(char*)*50);
char *data = fgets(stringInFile,50,inFile);
printf("%s", data);
fclose(inFile);
return data;
}
This is all in C btw.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** readFile(const char *fileName);
int main(int argc, char **argv){
char **data = readFile(argv[1]);
int i;
for(i=0; data[i] ; ++i){
puts(data[i]);
free(data[i]);
}
free(data);
return 0;
}
char** readFile(const char *fileName){
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = calloc(50, sizeof(char*));
char line[256];
int i = 0;
while(fgets(line, sizeof(line), inFile)){
char *p = strchr(line, '\n');
if(p)
*p = 0;
if(i < 50 - 1)
stringInFile[i++] = strdup(line);
}
fclose(inFile);
return stringInFile;
}
fgets() is only reading one Line by each call, and sets the file courser to the next line. If you want to read the fully file, you have to iterate it. To check if you are at the end, you can check for the EOF flag with feof().
Resulting in, for me working:
char* readFile(const char *fileName)
{
FILE *inFile;
inFile=fopen(fileName, "r");
char **stringInFile;
stringInFile = malloc(sizeof(char*)*50);
while(!feof(inFile))
{
printf("%s", fgets(stringInFile,50,inFile));
}
fclose(inFile);
return stringInFile;
}
And, you don't need the variable data - fgets() first parameter is a character Array, where is it automatical stored(for example Apple in your programm).
I'm quite new to C programming and have just begun studying files. I'm wondering whether it is possible to read a file line by line (including spaces in each line) into an array of size equal to the number of lines in the file. I really have no idea where to start or whether this is even possible so any guidance at all would be much appreciated.
Example
A text file in the form of:
Computer Programming
Software Engineering
Computer Architecture
to be written into array such that:
char array[4];
array[0] = "Computer Programming";
array[1] = "Software Engineering";
array[2] = "Computer Architecture";
All I have so far is:
int main()
{
char array[50];
bool answer;
FILE *classes;
classes = fopen("classnames.txt", "r");
if(classes == NULL){
printf("\n ************* ERROR *************\n");
printf("\n \"classnames.txt\" cannot be opened.\n");
printf("\n PROGRAM TERMINATED\n");
exit(EXIT_FAILURE);
}
And next I would like to write each class name into each element of the array.
Yes, you just have to declare array as char** and dynamically allocate as you read each line. E.g.
int MAX_NUM_LINES = 1000;
int MAX_LINE_LEN = 256;
char** array;
malloc(array, MAX_NUM_LINES*sizeof(char*));
fp = fopen(...);
int line_ct = 0;
char line[MAX_LINE_LEN];
while ( fgets(line, MAX_LINE_LEN, fp) != NULL )
{
int len = strlen(line);
malloc(array[line_ct], len * sizeof(char));
strcpy(array[line_ct], line);
line_ct++;
}
I have not actually tried to compile this code, but something like this will work. You can also replace MAX_NUM_LINES with the actual value by doing a quick runthrough first and counting the lines--that would be preferable probably.
This is an example of a possible approach
#include <stdio.h>
#include <string.h>
int main()
{
char array[100][100];
char line[100];
size_t arraySize;
size_t count;
FILE *file;
const char *filepath;
filepath = "<put the file path here>";
file = fopen(filepath, "r");
if (file == NULL)
{
perror("fopen()");
return -1;
}
count = 0;
arraySize = sizeof(array) / sizeof(array[0]);
while ((fgets(line, sizeof(line), file) != NULL) && (count < arraySize))
{
size_t length;
length = strlen(line);
if (line[length] == '\0')
line[--length] = '\0';
memcpy(array[count++], line, 1 + length);
}
fclose(file);
return 0;
}