C program to extract different substrings from array of strings - c

I want to extract different substrings from a file with array of strings. My file resembles this.
abcdxxx
efghijkyyy
lmzzz
ncdslanclsppp
kdfmsqqq
cbskdnsrrr
From the above file I want to extract xxx, yyy, zzz, ppp, qqq, rrr (basically last 3 characters) and store into an array. I refered this link How to extract a substring from a string in C? but not felt feasible because the content in my file is dynamic and might change for next execution. Can someone give a brief on this?
Here is my approach
FILE* fp1 = fopen("test.txt","r");
if(fp1 == NULL)
{
printf("Failed to open file\n");
return 1;
}
char array[100];
while(fscanf(fp1,"%[^\n]",array)!=NULL);
for(i=1;i<=6;i++)
{
array[i] += 4;
}

the content in my file is dynamic and might change for next execution
Then you need realloc or a linked list:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
FILE *f;
char **arr = NULL;
char s[100];
size_t i, n = 0;
f = fopen("text.txt", "r");
if (f == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
while (fgets(s, sizeof s, f) != NULL) {
arr = realloc(arr, sizeof(*arr) * (n + 1));
arr[n] = calloc(4, 1);
memcpy(arr[n++], s + strlen(s) - 4, 3);
}
fclose(f);
for (i = 0; i < n; i++) {
printf("%s\n", arr[i]);
free(arr[i]);
}
free(arr);
return 0;
}
Output:
xxx
yyy
zzz
ppp
qqq
rrr
If you always want the last 3 characters you can simplify:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
FILE *f;
char (*arr)[4] = NULL;
char s[100];
size_t i, n = 0;
f = fopen("text.txt", "r");
if (f == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
while (fgets(s, sizeof s, f) != NULL) {
arr = realloc(arr, sizeof(*arr) * (n + 1));
memcpy(arr[n], s + strlen(s) - 4, 3);
arr[n++][3] = '\0';
}
fclose(f);
for (i = 0; i < n; i++) {
printf("%s\n", arr[i]);
}
free(arr);
return 0;
}

Related

How can I alphabetize the first column of a .csv file in C?

I have a .csv file. Let's say the data is like this:
Location 1,Location 2,Price,Rooms,Bathrooms,CarParks,Type,Area,Furnish
Upper-East-Side,New-York,310000,3,2,0,Built-up,1000,Partly
West-Village,New-York,278000,2,2,0,Built-up,1000,Partly
Theater-District,New-York,688000,3,2,0,Built-up,1000,Partly
Expected output (alphabetized):
Theater-District
Upper-East-Side
West-Village
How can I only show and alphabetize the first column (Location 1) of the file while also skipping the header?
This is currently my code but it's still in a "read and display" form.
#include <stdio.h>
int main()
{
FILE *fh;
fh = fopen("file.csv", "r");
if (fh != NULL)
{
int line_number = 0;
char c;
while ( (c = fgetc(fh)) != EOF )
{
if(line_number > 0 || c == '\n'){
putchar(c);
}
if(c == '\n'){
line_number++;
}
}
fclose(fh);
} else printf("Error opening file.\n");
return 0;
}
csv is not a well defined format so I suggest you use an existing csv library instead of parsing the data yourself. For instance, this will not work if the first field has any embedded commas. It relies on scanf() to allocate the line, and resizes the lines array as needed. This means there are no arbitrary limits.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int strcmp2(const void *a, const void *b) {
return strcmp((const char *) a, (const char *) b);
}
int main() {
FILE *f = fopen("unsorted.csv", "r");
if(!f) return 1;
char **lines = NULL;
size_t n = 0;
for(;; n++) {
char *location1;
int rv = fscanf(f, "%m[^,]%*[^\n]\n", &location1);
if(rv != 1) break;
char **tmp = realloc(lines, (n + 1) * sizeof *tmp);
if(!tmp) return 1;
lines = tmp;
tmp[n] = location1;
}
fclose(f);
free(lines[0]); // header
qsort(&lines[1], n - 1, sizeof *lines, strcmp2);
for(size_t i = 1; i < n; i++) {
printf("%s\n", lines[i]);
free(lines[i]);
}
free(lines);
}
It produces the expected output:
Theater-District
Upper-East-Side
West-Village
So, assuming some hard limits on line length and CSV file record count, we can just use arrays.
To read a record, just use fgets(). Add each line of text to the array using the usual method.
We use a simple string search and truncate to isolate the first field. (Assuming no fancy stuff like double-quoted fields. I assume you are doing homework.)
To sort everything except the CSV header record, use qsort() with a little additional mathematics.
#include <iso646.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define unused(x) (void)(x)
#define MAX_LINE_LENGTH 100
#define MAX_RECORD_COUNT 100
int main( int argc, char ** argv )
{
unused( argc );
char records[MAX_RECORD_COUNT][MAX_LINE_LENGTH];
size_t record_count = 0;
const char * filename = argv[1];
if (!filename) return 1;
// Read our records from file
FILE * f = fopen( filename, "r" );
if (!f) return 1;
while ((record_count < MAX_RECORD_COUNT)
and fgets( records[record_count], MAX_LINE_LENGTH, f ))
record_count += 1;
fclose( f );
// Truncate the strings to just the first field
for (size_t n = 0; n < record_count; n++)
{
char * p = strchr( records[n], ',' );
if (p) *p = '\0';
}
// Sort everything but the header
if (record_count > 2) // must exist at least two records + header
qsort( records+1, record_count-1, MAX_LINE_LENGTH,
(int (*)( const void *, const void * ))strcmp );
// Print everything but the header
for (size_t n = 1; n < record_count; n++)
printf( "%s\n", records[n] );
return 0;
}

How do I add multiple strings to an array?

I want to read all .txt files in the directory and add those file names to an array. Catching the text files part is okay but I am having a problem storing those file names inside an array. What is the mistake I've done here? This is my code.
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <strings.h>
int main()
{
DIR *p;
struct dirent *pp;
p = opendir ("./");
char file_list[10][10];
char shades[10][10];
int i = 0;
if (p != NULL)
{
while ((pp = readdir (p))!=NULL) {
int length = strlen(pp->d_name);
if (strncmp(pp->d_name + length - 4, ".txt", 4) == 0) {
puts (pp->d_name);
strcpy(shades[i], pp->d_name);
}
}
i = i + 1;
(void) closedir (p);
for(int i=0; i<4; i++){
printf("\n %s", &shades[i]);
}
}
return(0);
}
What seems to be a problem here is that 'strings' in C works much different than in C++ or C# or Python.
To make it work you'll need to alloc memory for each string.
If I understood correctly what you want to do, here's an example:
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#define FILE_LIST_MAX 10
int main()
{
DIR *p = opendir("./");
struct dirent *pp;
char **file_list = malloc(FILE_LIST_MAX * sizeof(char*));
int i = 0;
if (p == NULL) {
printf("Could not open current directory" );
return 0;
}
while ((pp = readdir(p)) != NULL) {
int length = strlen(pp->d_name);
printf ("%s\n", pp->d_name);
if(length > 4 && memcmp(pp->d_name + length - 4, ".txt", 4) == 0) {
char filename[length];
sprintf(filename, "%s", pp->d_name);
file_list[i] = malloc(length * sizeof(char));
strcpy(file_list[i], filename);
i++;
}
}
(void) closedir(p);
printf("\n.txt filenames within array");
for(int j = 0; j < i; j++) {
printf("\n%s", file_list[j]);
}
for(int j = 0; j < i; j++) {
free(file_list[j]);
}
free(file_list);
return(0);
}
It's not perfect though as you must manually change file_list capacity in FILE_LIST_MAX
Here's an example output from executing this code:
Much better approach would be implementing a dynamic array of chars which will automatically resizes as necessary.

Creating 1000 text files in C

I am learning C language. Here is a simple program I did to create 1000 text files.
#include <stdio.h>
#include <string.h>
char * create_filename(char *, char *, int);
int main(void)
{
char prefix_name[50] = "file_no_";
char snum[5];
int no_of_files = 1000;
FILE * fp = NULL;
for (int i = 0; i < no_of_files; i++)
{
fp = fopen( create_filename(prefix_name, snum, i + 1), "w");
fprintf(fp, "This is file no %d", i+1);
fclose(fp);
fp = NULL;
strcpy(prefix_name, "file_no_");
}
return 0;
}
char * create_filename(char * prefix_name, char * snum, int i)
{
sprintf(snum, "%d", i);
strcat(prefix_name, snum);
strcat(prefix_name, ".txt");
return prefix_name;
}
This runs as expected. But I want to know, how can I make this more efficient and as portable as possible. If I want to scale this up to, say 10000 text files, are there other approaches which will be better ?
Thanks
how can I make this more efficient and as portable as possible.
More error checking. Example: a failed fopen() can readily occur.
Realize that a huge amount of time will occur in fopen() and local code likely will have scant time improvements.
Avoid re-writing the prefix.
Use a helper function.
Example:
// Return count of successfully written files.
int create_many_files(const char *prefix, int count, const char *suffix) {
int n;
int len = snprintf(NULL, 0, "%s%n%d%s", prefix, &n, count, suffix);
if (len < 0) {
return 0;
}
// Use len to determine longest name and use a VLA or allocation.
// Consider using a fixed array when len is not too big.
char *filename = malloc((size_t)len + 1u);
if (filename == NULL) {
return 0;
}
strcpy(filename, prefix);
char *offset = filename + n;
for (int i = 0; i < count; i++) {
sprintf(offset, "%d%s", i + 1, suffix);
FILE *fp = fopen(filename, "w");
if (fp == NULL) {
free(filename);
return i;
}
fprintf(fp, "This is file no %d", i + 1);
fclose(fp);
}
free(filename);
return count;
}
Other potential error checks:
prefix == NULL
cout < 0
suffix == NULL
fprintf() < 0
fclose() != 0

How to loop a nested array in C

I've been developing a guessing game in which the goal is to guess the character selected by the user among specific characters, anyway, my first and only idea is to create an array with the questions to be asked, and each question has its options like in the code below I'm a newbie in C language so that I there are several things which I'm not sure how to handle. In short, I'd like to know how can I loop over the array showing to the user the questions with its questions to be answered? Here's the code.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define ROW 500
#define LINE 200
//Read file and append to an array buffer
char *characters(){
char *source = NULL;
FILE *fp = fopen("file.txt", "r");
if (fp != NULL) {
/* Go to the end of the file. */
if (fseek(fp, 0L, SEEK_END) == 0) {
/* Get the size of the file. */
long bufsize = ftell(fp);
if (bufsize == -1) { /* Error */ }
/* Allocate our buffer to that size. */
source = malloc(sizeof(char) * (bufsize + 1));
/* Go back to the start of the file. */
if (fseek(fp, 0L, SEEK_SET) != 0) { /* Error */ }
/* Read the entire file into memory. */
size_t newLen = fread(source, sizeof(char), bufsize, fp);
if ( ferror( fp ) != 0 ) {
fputs("Error reading file", stderr);
} else {
source[newLen++] = '\0'; /* Just to be safe. */
}
}
fclose(fp);
}
return source;
}
char *strndup(const char *s, size_t n) {
char *p;
size_t n1;
for (n1 = 0; n1 < n && s[n1] != '\0'; n1++)
continue;
p = malloc(n + 1);
if (p != NULL) {
memcpy(p, s, n1);
p[n1] = '\0';
}
return p;
}
// User input
char *input(){
char *value;
char buffer[10];
int j = 0;
while( j < 1 && fgets(buffer, 10, stdin) != NULL){
value = strndup(buffer, 10);
j++;
}
return value;
}
// Main function
int main (void)
{
char *questions[] = {
"Genre",{"male","female"},
"Hair", {"black","red","blond"},
"Cloths",{"dress","shirt","pants"},
"pet", {"dog","cat","pig"}
};
int asked[4] = {0};
char *answers[5];
char buffer[6];
srand(time(NULL));
for (int i = 0; i < 4; i++) {
int q = rand() % 4;
while (asked[q])
q = rand() % 4;
asked[q]++;
printf ("%s\n", questions[q]);
answers[i] = input();
}
for(int i = 0; i < 4; i++)
{
printf(" %s ",answers[i]);
}
return 0;
}
That's the file's structure I'll compare as long as I have all the answers from the user.
female,blond,vestido,pig,character b
male,black,shirt,pants,dog,character c
male,black,shirt,pants,cat,character d
female,blond,dress,cat,character A
male,red,shirt,pants,pig,character e

Reading and writing from files with a new form

I'm trying to read from a file and I have to use a new form of it I'm not really certain how to use. I've posted the code below of what function I have and I'm not sure what to do about this error and how to fix it?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
double* read_file(FILE* file, int length);
int main(int argc, char* argv[])
{
double* array = malloc(10 * sizeof(double));
int length = atoi(*(argv + 1));
FILE* file = *(argv + 2);
if (argc < 4 || argc > 4)
{
printf("Insufficient arguments. Check your command line.\n");
return 0;
}
array = read_file(file, length);
printf("%p", array);
return 0;
}
double* read_file (FILE* file, int length)
{
FILE* ptr;
double* array = malloc(length * sizeof(double));
int i = 0;
if ((ptr = fopen(file, "r")) == NULL)
{
return 0;
}
else
{
for (i = 0; i < length; i++)
{
fscanf(ptr, "%lf", (array + i));
}
}
fclose(ptr);
return array;
}
First of all, you're trying to assign a char string to a variable of type pointer to FILE. The compiler won't let you do that.
// not allowed
FILE* file = *(argv + 2);
Secondly, you're passing a pointer to FILE to fopen(), but fopen() expects it's first argument to be a char string so that won't work either.
// file is FILE*, not allowed
ptr = fopen(file, "r"));
If you fix those two lines the code should compile.
fix like this
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
//double* read_file(FILE* file, int length);
//You do not need to pass a file pointer, you need a file name.
//It opens the file within this function
double *read_file(const char *filename, int length);
int main(int argc, char* argv[]){
//if (argc < 4 || argc > 4)
//argc < 4 || argc > 4 same as argc != 4
//It is thought that it is 3 because only argv[1] and argv[2] are used.
//It is necessary to check arguments before using them.
if (argc != 3) {
printf("Usage : %s number_of_elements file_name\n", argv[0]);
printf("Insufficient arguments. Check your command line.\n");
return 0;
}
//double* array = malloc(10 * sizeof(double));//It is not necessary as it is secured by read_file. Make memory leak.
int length = atoi(argv[1]);
const char *file = argv[2];
double *array = read_file(file, length);
if(array != NULL){
for(int i = 0; i < length; ++i)
printf("%f\n", array[i]);
free(array);
}
return 0;
}
double* read_file (const char *file, int length){
FILE *ptr;
if ((ptr = fopen(file, "r")) == NULL){
return NULL;
}
//It causes a memory leak unless you first confirm that the file can be opened
double *array = malloc(length * sizeof(double));
for (int i = 0; i < length; i++){
if(1 != fscanf(ptr, "%lf", array + i)){
printf("Failed to read the %ith element.\n", i+1);
break;
}
}
fclose(ptr);
return array;
}

Resources