How to write text containing newline given as command line arguments in C? - c

I want to create a text file with mulitple lines using system calls in C and populate it with the text provided as command line arguments.
This is what I wrote:
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#define MAX_SZ 1024
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Invalid Number of arguments\n");
printf("USAGE: ./a.out file_name \"msg\"\n");
} else {
int fd_creat, fd_open, fd_write;
char file_name[MAX_SZ];
char *msg = (char *)malloc(strlen(argv[2]) * sizeof(char));
strcpy(file_name, argv[1]);
fd_creat = creat(file_name, 0777);
if (fd_creat < 2) {
printf("ERROR: File could not be created\n");
} else {
fd_open = open(file_name, O_WRONLY);
strcpy(msg, argv[2]);
fd_write = write(fd_open, msg, strlen(msg));
close(fd_open);
}
}
return 0;
}
If I execute this program as:
./a.out test.txt "Foo\nBar"
It writes the whole thing into test.txt as it is. Basically, I want 'Foo' and 'Bar' in their separate lines.

There's two problems here:
The way you're handling arguments and failing to allocate enough memory for the data involved,
Interpreting escape sequences like \n correctly since the shell will give them to you as-is, raw.
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
// This moves overlapping strings from src -> dest but only
// if dest is before src
void cc_str_drag(char* dest, char* src) {
while (*dest) {
*dest = *src;
++dest;
++src;
}
}
// This interprets the \n sequence and can be extended to handle others, like
// \t, \\, or even \g.
void cc_interpret(char* str) {
for (;*str; ++str) {
// If this is a sequence start...
if (*str == '\\') {
// ...find out which one...
switch (str[1]) {
case 'n':
// Shift back...
cc_str_drag(str, &str[1]);
// ...and replace it.
*str = '\n';
break;
}
}
}
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Invalid Number of arguments\n");
// Remember argv[0] is the name of the program
printf("USAGE: %s file_name \"msg\"\n", argv[0]);
return -1;
}
// Since it's not the 1970s, use fopen() and FILE*
FILE* output = fopen(argv[1], "w");
if (!output) {
printf("ERROR: File could not be created\n");
return -2;
}
// Copying here to avoid tampering with argv
char* str = strdup(argv[2]);
// Replace any escape sequences
cc_interpret(str);
// Then just dump it directly into the file
fwrite(str, 1, strlen(str), output);
fclose(output);
return 0;
}
Note the tools used here:
strdup is a way quicker method of copying a C string than malloc(strlen(s)) and then copying it. That's asking for dreaded off-by-one errors.
FILE* performs much better because it's buffered. open() is used for low-level operations that can't be buffered. Know when to use which tool.
Don't be afraid to write functions that manipulate string contents. C strings are really important to understand, not fear.

Related

How to use fscanf to read a text file including many words and store them into a string array by index

The wordlist.txt is including like:
able
army
bird
boring
sing
song
And I want to use fscanf() to read this txt file line by line and store them into a string array by indexed every word like this:
src = [able army bird boring sing song]
where src[0]= "able", src[1] = "army" and so on. But my code only outputs src[0] = "a", src[1] = "b"... Could someone help me figure out what's going wrong in my code:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL)
{
printf("%s", "File open error");
return 0;
}
char src[1000];
for (int i = 0; i < sizeof(src); i++)
{
fscanf(fp, "%[^EOF]", &src[i]);
}
fclose(fp);
printf("%c", src[0]);
getchar();
return 0;
}
Pretty appreciated!
For example like this.
#include <stdio.h>
#include <string.h>
#include <errno.h>
#define MAX_ARRAY_SIZE 1000
#define MAX_STRING_SIZE 100
int main(int argc, char *argv[]) {
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL) {
printf("File open error\n");
return 1;
}
char arr[MAX_ARRAY_SIZE][MAX_STRING_SIZE];
int index = 0;
while (1) {
int ret = fscanf(fp, "%s", arr[index]);
if (ret == EOF) break;
++index;
if (index == MAX_ARRAY_SIZE) break;
}
fclose(fp);
for (int i = 0; i < index; ++i) {
printf("%s\n", arr[i]);
}
getchar();
return 0;
}
Some notes:
If there is an error, it is better to return 1 and not 0, for 0 means successful execution.
For a char array, you use a pointer. For a string array, you use a double pointer. A bit tricky to get used to them, but they are handy.
Also, a check of the return value of the fscanf would be great.
For fixed size arrays, it is useful to define the sizes using #define so that it is easier to change later if you use it multiple times in the code.
It's reading file one character at a time, Which itself is 4 in size like we see sizeof('a') in word able. Same goes for 'b' and so on. So one approach you can use is to keep checking when there is a space or newline character so that we can save the data before these two things as a word and then combine these small arrays by adding spaces in between and concatenating them to get a single array.

Proper manipulation of file name string in C

I'm writing a program to convert one specific file format let's say file.foo -> file.bar. It's the first time I'm using C for file I/O (I've always used php in the past). I would like an opinion about the first part of the program: create the output file name from argv[1].
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main (int argc, char **argv) {
if(argc == 1) {
printf("Need a .foo file");
return 0;
}
char *ext = strrchr(argv[1], '.');
if(!ext || strcmp(ext, ".foo")) {
printf("Not a .foo file");
return 0;
}
char *basename = (strrchr(argv[1], '\\'));
if(!basename) {
basename = argv[1];
} else {
basename++;
}
char *out_name = (char*) malloc((strlen(basename)+1)*sizeof(char));
sprintf(out_name, "%.*s.bar", (int) (ext - basename), basename);
/* CODE */
}
My concerns are:
preserving argv[1]
work with absolute and relative path (C:\User\path\file.foo, .\file.foo, file.foo)
It works fine (from what I can tell). Have I missed something or perhaps have I used redundant variables? I'm asking purely because I think the situation it's so general that there should be a consolidated way to go.

Get the length of each line in file with C and write in output file

I am a biology student and I am trying to learn perl, python and C and also use the scripts in my work. So, I have a file as follows:
>sequence1
ATCGATCGATCG
>sequence2
AAAATTTT
>sequence3
CCCCGGGG
The output should look like this, that is the name of each sequence and the count of characters in each line and printing the total number of sequences in the end of the file.
sequence1 12
sequence2 8
sequence3 8
Total number of sequences = 3
I could make the perl and python scripts work, this is the python script as an example:
#!/usr/bin/python
import sys
my_file = open(sys.argv[1]) #open the file
my_output = open(sys.argv[2], "w") #open output file
total_sequence_counts = 0
for line in my_file:
if line.startswith(">"):
sequence_name = line.rstrip('\n').replace(">","")
total_sequence_counts += 1
continue
dna_length = len(line.rstrip('\n'))
my_output.write(sequence_name + " " + str(dna_length) + '\n')
my_output.write("Total number of sequences = " + str(total_sequence_counts) + '\n')
Now, I want to write the same script in C, this is what I have achieved so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
input = FILE *fopen(const char *filename, "r");
output = FILE *fopen(const char *filename, "w");
double total_sequence_counts = 0;
char sequence_name[];
char line [4095]; // set a temporary line length
char buffer = (char *) malloc (sizeof(line) +1); // allocate some memory
while (fgets(line, sizeof(line), filename) != NULL) { // read until new line character is not found in line
buffer = realloc(*buffer, strlen(line) + strlen(buffer) + 1); // realloc buffer to adjust buffer size
if (buffer == NULL) { // print error message if memory allocation fails
printf("\n Memory error");
return 0;
}
if (line[0] == ">") {
sequence_name = strcpy(sequence_name, &line[1]);
total_sequence_counts += 1
}
else {
double length = strlen(line);
fprintf(output, "%s \t %ld", sequence_name, length);
}
fprintf(output, "%s \t %ld", "Total number of sequences = ", total_sequence_counts);
}
int fclose(FILE *input); // when you are done working with a file, you should close it using this function.
return 0;
int fclose(FILE *output);
return 0;
}
But this code, of course is full of mistakes, my problem is that despite studying a lot, I still can't properly understand and use the memory allocation and pointers so I know I especially have mistakes in that part. It would be great if you could comment on my code and see how it can turn into a script that actually work. By the way, in my actual data, the length of each line is not defined so I need to use malloc and realloc for that purpose.
For a simple program like this, where you look at short lines one at a time, you shouldn't worry about dynamic memory allocation. It is probably good enough to use local buffers of a reasonable size.
Another thing is that C isn't particularly suited for quick-and-dirty string processing. For example, there isn't a strstrip function in the standard library. You usually end up implementing such behaviour yourself.
An example implementation looks like this:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
char line[MAXLEN]; /* Current line buffer */
char ref[MAXLEN] = ""; /* Sequence reference buffer */
int nseq = 0; /* Sequence counter */
if (argc != 3) {
fprintf(stderr, "Usage: %s infile outfile\n", argv[0]);
exit(1);
}
in = fopen(argv[1], "r");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s.\n", argv[1]);
exit(1);
}
out = fopen(argv[2], "w");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s for writing.\n", argv[2]);
exit(1);
}
while (fgets(line, sizeof(line), in)) {
int len = strlen(line);
/* Strip whitespace from end */
while (len > 0 && isspace(line[len - 1])) len--;
line[len] = '\0';
if (line[0] == '>') {
/* First char is '>': copy from second char in line */
strcpy(ref, line + 1);
} else {
/* Other lines are sequences */
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
A lot of code is about enforcing arguments and opening and closing files. (You could cut out a lot of code if you used stdin and stdout with file redirections.)
The core is the big while loop. Things to note:
fgets returns NULL on error or when the end of file is reached.
The first lines determine the length of the line and then remove white-space from the end.
It is not enough to decrement length, at the end the stripped string must be terminated with the null character '\0'
When you check the first character in the line, you should check against a char, not a string. In C, single and double quotes are not interchangeable. ">" is a string literal of two characters, '>' and the terminating '\0'.
When dealing with countable entities like chars in a string, use integer types, not floating-point numbers. (I've used (signed) int here, but because there can't be a negative number of chars in a line, it might have been better to have used an unsigned type.)
The notation line + 1 is equivalent to &line[1].
The code I've shown doesn't check that there is always one reference per sequence. I'll leave this as exercide to the reader.
For a beginner, this can be quite a lot to keep track of. For small text-processing tasks like yours, Python and Perl are definitely better suited.
Edit: The solution above won't work for long sequences; it is restricted to MAXLEN characters. But you don't need dynamic allocation if you only need the length, not the contents of the sequences.
Here's an updated version that doesn't read lines, but read characters instead. In '>' context, it stored the reference. Otherwise it just keeps a count:
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h> /* for isspace() */
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
int nseq = 0; /* Sequence counter */
char ref[MAXLEN]; /* Reference name */
in = fopen(argv[1], "r");
out = fopen(argv[2], "w");
/* Snip: Argument and file checking as above */
while (1) {
int c = getc(in);
if (c == EOF) break;
if (c == '>') {
int n = 0;
c = fgetc(in);
while (c != EOF && c != '\n') {
if (n < sizeof(ref) - 1) ref[n++] = c;
c = fgetc(in);
}
ref[n] = '\0';
} else {
int len = 0;
int n = 0;
while (c != EOF && c != '\n') {
n++;
if (!isspace(c)) len = n;
c = fgetc(in);
}
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
Notes:
fgetc reads a single byte from a file and returns this byte or EOF when the file has ended. In this implementation, that's the only reading function used.
Storing a reference string is implemented via fgetc here too. You could probably use fgets after skipping the initial angle bracket, too.
The counting just reads bytes without storing them. n is the total count, len is the count up to the last non-space. (Your lines probably consist only of ACGT without any trailing space, so you could skip the test for space and use n instead of len.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *my_file = fopen(argv[1], "r");
FILE *my_output = fopen(argv[2], "w");
int total_sequence_coutns = 0;
char *sequence_name;
int dna_length;
char *line = NULL;
size_t size = 0;
while(-1 != getline(&line, &size, my_file)){
if(line[0] == '>'){
sequence_name = strdup(strtok(line, ">\n"));
total_sequence_coutns +=1;
continue;
}
dna_length = strlen(strtok(line, "\n"));
fprintf(my_output, "%s %d\n", sequence_name, dna_length);
free(sequence_name);
}
fprintf(my_output, "Total number of sequences = %d\n", total_sequence_coutns);
fclose(my_file);
fclose(my_output);
free(line);
return (0);
}

C open() returns -1 for a path string

when i use open() as
int ff=open("/home/user/desktop/bla/test",O_RDONLY);
it works fine.
but when i use a string as the (same path stored in a string) path it doesn't work.
int ff=open(string,O_RDONLY);
why is this?
this is my whole code. i'm confused. i know it should work. but i doesn't. i can't find the bug.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <string.h>
void bla(char *);
int main(void)
{
// int i;
FILE * fp=fopen("path","r");
char line[256];
while( fgets(line,255,fp)!=NULL){
bla(line);
}
return 0;
}
void bla(char * line){
int status;
printf("%s",line);
pid_t pid=fork();
char string[256];
if(pid==0){
pid=wait(&status);
int ff=open(line,O_RDONLY);
if(ff<0){
printf("\topen error!\n\n");
return;}
int ret=read(ff,string,255);
if(ret<0){
printf("read error!\n");
return;}
printf("%s",string);
close(ff);
exit(0);
}
if (pid>0){
return;
}
}
in bla function, if i replace 'line' with the path, it works. i used a printf to make sure. path is the same (it seems the same).
I call shenanigans :-)
Obviously, if string were exactly the same value as your string literal, and all others things remained the same, it would work fine.
So, if all other things are the same, string is obviously not set to the value you think it is, to wit "/home/user/desktop/bla/test".
My advice would be to print it out (along with an error string) to be sure, something like:
fprintf (stderr, "DEBUG: string is [%s]\n", string);
int ff = open (string, O_RDONLY);
if (ff < 0)
perror ("Could not open file");
And, now that you've posted the code, your problem is obvious: fgets() does not remove the trailing newline so, were you to add the code I proposed above, you would see:
DEBUG: string is [/home/user/desktop/bla/test
]
and deduce the problem.
A quick way to fix this is to remove the newline yourself, with something like:
while (fgets (line, 255, fp) != NULL) {
size_t ln = strlen (line); // Add these
if ((ln > 0) && (line[ln-1] == '\n')) // three
line[ln-1] = '\0'; // lines.
bla (line);
}

Save data into a file: where the adress of the file is given by the user

I ran a simulation for some data y1, y2,..yn and generate vectors w, mu. At each simulation these results are stored into a file, let us say (normally w and mu are very long vectors 10,000 entries)
/home/carlos/Documents/Results/w.txt
/home/carlos/Documents/Results/mu.txt
But if I want to run my algorithm with other data set, and do not want to lose the previous results, I have to go directly into my C code and change (or move the w.txt, mu.txt to other file)
/home/carlos/Documents/Results/OtherData/w.txt
/home/carlos/Documents/Results/OtherData/mu.txt
I do not want to go every time into my C code to change the address(or move again and again w.txt, mu.txt), I would like to just create a new folder with a name: OtherData and store the data there just giving the address
/home/carlos/Documents/Results/OtherData/
as an input for the code
I did a very simplified example but it does not work, could somebody give me a hand?
#include <stdio.h>
#include <string.h>
void main(char *dir){
char dir_happy[100] = *dir, dir_sad[100]=*dir;
FILE *ffile_happy, *ffile_sad;
strcat(dir_happy, "/happy.txt");
strcat(dir_sad, "/sad.txt");
ffile_happy = fopen("dir_happy.txt", "w");
ffile_sad = fopen("dir_sad.txt", "w");
fprintf(ffile_happy, "Hello!, happy world\n");
fprintf(ffile_sad, "Hello!, sad world\n");
fclose(ffile_happy);
fclose(ffile_sad);
}
You have the arguments to main() wrong. The proper prototype is:
int main(int argc, char *argv[]);
Where argc is the number of arguments given, and argv is a vector holding each argument. The first argument (in argv[0]) is generally the program's name.
Untested. Have fun.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_FILENAME_LENGTH 100
#define DEFAULT_DIR "."
#define HAPPY_NAME "/happy.txt"
#define SAD_NAME "/sad.txt"
int main(int argc, char **argv) {
char name1[MAX_FILENAME_LENGTH+1], name2[MAX_FILENAME_LENGTH+1];
size_t dirlen;
char *dir = DEFAULT_DIR;
FILE *ffile_happy, *ffile_sad;
if (argc == 2) {
dir = argv[1];
}
dirlen = strlen(dir);
if (len + strlen(HAPPY_NAME) > MAX_FILENAME_LENGTH) {
fprintf(stderr, "Directory name too long. Program aborted.\n");
exit(EXIT_FAILURE);
}
if (len + strlen(SAD_NAME) > MAX_FILENAME_LENGTH) {
fprintf(stderr, "Directory name too long. Program aborted.\n");
exit(EXIT_FAILURE);
}
strcpy(name1, dir); strcat(name1, HAPPY_NAME);
strcpy(name2, dir); strcat(name2, SAD_NAME);
ffile_happy = fopen(name1, "w");
if (ffile_happy == NULL) {
fprintf(stderr, "Unable to open file \"%s\" for writing. Program aborted.\n", name1);
exit(EXIT_FAILURE);
}
ffile_sad = fopen(name2, "w");
if (ffile_sad == NULL) {
fprintf(stderr, "Unable to open file \"%s\" for writing. Program aborted.\n", name2);
fclose(ffile_happy);
exit(EXIT_FAILURE);
}
/* use files */
fclose(ffile_happy);
fclose(ffile_sad);
return 0;
}
void main(char *dir) is the problem.
Main takes 2 args int/void main(int argc, char *argv[])
argc is the number of arguments to the executable.
argv[0] is the filename of the executable.
argv[1..n] are the arguments passed (normally space separated, with quotes allowed)
So /a.out Hello "Look at me" would parse as
argv[0] => './a.out'
argv[1] => 'Hello'
argv[2] => 'Look at me'

Resources