how to search an element from a file in c - c

My code needs to do three things:
Read numbers from a file FILE1 into an array (dynamic)
Sort those numbers
Search for numbers input from a FILE2 in the sorted array.
.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
int main (int argc, char *argv[]) {
FILE *fp1 = fopen ("myFile1.txt", "r");
if (fp1 == NULL) {
printf ("cannot open this file");
exit (0);
}
FILE *fp2 = fopen ("test1.txt", "w");
if (fp2 == NULL) {
puts ("Not able to open this file");
exit (1);
}
int i = 0, num, j, k;
int *B = NULL;
int *C;
int a;
int size = 32;
B = malloc (sizeof (int) * size);
while (fscanf (fp1, "%d", &num) == 1) {
if (i < size) {
B[i] = num;
fprintf (fp2, "%d\r\n", num);
i++;
}
else {
C = malloc (sizeof (int) * 2 * size);
memcpy (C, B, size * sizeof (int));
free (B);
B = &C[0];
B[i] = num;
i++;
size = size * 2;
i++;
for (j = 0; j < size; ++j) {
for (k = j + 1; k < size; ++k) {
if (B[j] < B[k]) {
a = &B[j];
B[j] = B[k];
B[k] = a;
}
}
}
printf ("after sorting");
for (j = 0; j < size; ++j)
printf ("%d\n", B[j]);
}
}
return 0;
fclose (fp1); /* note this code is never reached */
fclose (fp2);
}
I successfully complete the first part of reading in the numbers from a file. But I am not able to understand how to sort these numbers.
I am trying to apply bubble sort, but it puts 0s in my array. How is my implementation incorrect?

& is the address-of operator. You pass it as a pointer. You need a = B[i], since a is an int.
Now you sort the numbers descending, if you want them to be ascending change the < to > in if (B[j] < B[k]).
Also you must always check whether malloc succeeded or not with e.g.:
if (!B) {
fprintf(stderr,"B alloc error");
exit(-1);
}
Also you might want to consider realloc.
In addition there is a built-in qsort in stdlib.h, which gives much better time than O(n^2).
Note: I haven't tested your file operations, since you said they work properly.

Related

Segmentation fault when trying to read a file with 4000^2 characters

I'm writing a program to read numbers from a .txt file to then put into a 2-dimensional matrix that I can use to do matrix multiplication with but at this point I'm having alot of trouble getting the portion of my code that scans the file to work properly. I have two randomly generated matrixes that I'm using and for the smaller one it will read the first 400 values but then the rest of the array will be zeros. For the larger one, which is 4000x4000, it will just throw a segmentation fault without even going into the main. Any ideas at what would be causing this? I change ARRAY_SIZE to whatever the array length and Width are.
#include <stdio.h>
#include <stdlib.h>
#define ARRAY_SIZE 4000
int main(int argc, char *argv[]) {
// Form to read: ./programname #ofthreads inputfilename1 inputefilename2 outputfilename
if(argc != 5) {
printf("Error! usage: ./programname #ofthreads inputfilename1 inputfilename2 outputfilename");
return (EXIT_FAILURE);
}
// get number of threads
int numThreads = atoi(argv[1]);
// make file pointers
FILE *fp1;
FILE *fp2;
// assign pointer to file name
fp1 = fopen(argv[2], "r");
fp2 = fopen(argv[3], "r");
// Error Handling if file doesn't exist
if (fp1 == NULL) {
printf("Error: File 1 does not exist. ");
return (EXIT_FAILURE);
}
if (fp2 == NULL) {
printf("Error: File 2 does not exist. ");
return (EXIT_FAILURE);
}
// initialize arrays
int array1[ARRAY_SIZE][ARRAY_SIZE] = {0};
int array2[ARRAY_SIZE][ARRAY_SIZE] = {0};
// initialize dimension ints
int size1[2];
int size2[2];
// Get Dimensions
fscanf(fp1,"%d ",&size1[0]);
fscanf(fp1,"%d \n", &size1[1]);
fscanf(fp2,"%d ",&size2[0]);
fscanf(fp2,"%d \n", &size2[1]);
int length1 = size1[0];
int width1 = size1[1];
int length2 = size2[0];
int width2 = size2[1];
for(int n = 0; n < length1; n++){
for(int m = 0; m < width1; m++){
fscanf(fp1, "%d ", &array1[m][n]);
}
}
for(int n = 0; n < length2; n++){
for(int m = 0; m < width2; m++){
fscanf(fp1, "%d ", &array2[m][n]);
}
}
// Process file here
// Close file
fclose(fp1);
fclose(fp2);
for(int n = 0; n < width1; n++){
for(int m = 0; m < length1; m++){
// printf("%d ", array1[m][n]);
}
printf("\n");
}
printf("Number of threads = %d\n", numThreads);
printf("Size1 = %d x %d\n", size1[0],size1[1]);
printf("Size2 = %d x %d\n", size2[0],size2[1]);
return 0;
}
2 x 4000 x 4000 ints will most probably take up more stack space than you have available. Allocate the memory dynamically using calloc instead (declared in stdlib.h):
// allocate space for ARRAY_SIZE elements of size int[ARRAY_SIZE] and zero the memory:
int(*array1)[ARRAY_SIZE] = calloc(ARRAY_SIZE, sizeof *array1);
int(*array2)[ARRAY_SIZE] = calloc(ARRAY_SIZE, sizeof *array2);
if(array1 == NULL || array2 == NULL) exit(1);
However, by the looks of it, you don't actually need all that memory in most cases since you get length1, width1, length2 and width2 from the files. Allocate the arrays after you've gotten that input from the files:
if(fscanf(fp1, " %d %d", &length1, &width1) != 2 ||
fscanf(fp2, " %d %d", &length2, &width2) != 2) exit(1);
int(*array1)[width1] = calloc(length1, sizeof *array1);
int(*array2)[width2] = calloc(length2, sizeof *array2);
if(array1 == NULL || array2 == NULL) exit(1);
Then use array1 and array2 just like you did before.
When you are done with them, free the allocated memory:
free(array1);
free(array2);
Process stack size is limited (few MiBs). It varies between systems based on OS implementation. If you need anything over that, better get it from heap (Memory management calls).
int rows = 4000;
int cols = 4000;
int **array = (int**) malloc (rows * sizeof(int*));
if (!array) {
perror("malloc1");
exit(1);
}
for (ri = 0; ri < rows; ++ri) {
array[ri] = (int*) malloc (cols * sizeof(int));
if (!array[ri]) {
perror("malloc2");
exit(2);
}
}
Remember to free the allocated memory in reverse order. First the columns' (loop) then the rows'.
Edit:
3. Assuming you allocated both array1 & array2 using malloc() calls.
Reading array2 contents
fscanf(fp1, "%d ", &array2[m][n]);
shouldn't that be fp2?
Since you're going to do matrix multiplication you need to verify the order of matrices.
first write code for smaller matrix dimensions without malloc() & without reading data from files.

To mimic sort command of linux, to sort lines of a text file

Sort command of linux must sort the lines of a text file and transfer the output to another file. But my code gives a runtime error. Please rectify the pointer mistakes so that output.
In which line exactly should I make changes? Because there is no output after all.
I'm pasting the whole code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void sortfile(char **arr, int linecount) {
int i, j;
char t[500];
for (i = 1; i < linecount; i++) {
for (j = 1; j < linecount; j++) {
if (strcmp(arr[j - 1], arr[j]) > 0) {
strcpy(t, arr[j - 1]);
strcpy(arr[j - 1], arr[j]);
strcpy(arr[j], t);
}
}
}
}
int main() {
FILE *fileIN, *fileOUT;
fileIN = fopen("test1.txt", "r");
unsigned long int linecount = 0;
int c;
if (fileIN == NULL) {
fclose(fileIN);
return 0;
}
while ((c = fgetc(fileIN)) != EOF) {
if (c == '\n')
linecount++;
}
printf("line count=%d", linecount);
char *arr[linecount];
char singleline[500];
int i = 0;
while (fgets(singleline, 500, fileIN) != NULL) {
arr[i] = (char*)malloc(500);
strcpy(arr[i], singleline);
i++;
}
sortfile(arr, linecount);
for (i = 0; i < linecount; i++) {
printf("%s\n", arr[i]);
}
fileOUT = fopen("out.txt", "w");
if (!fileOUT) {
exit(-1);
}
for (i = 0; i < linecount; i++) {
fprintf(fileOUT, "%s", arr[i]);
}
fclose(fileIN);
fclose(fileOUT);
}
The problem in your code is you do not rewind the input stream after reading it the first time to count the number of newlines. You should add rewind(fileIN); before the next loop.
Note however that there are other problems in this code:
the number of newline characters may be less than the number of successful calls to fgets(): lines longer than 499 bytes will be silently broken in multiple chunks, causing more items to be read by fgets() than newlines. Also the last line might not end with a newline. Just count the number of successful calls to fgets().
You allocate 500 bytes for each line, which is potentially very wasteful. Use strdup() to allocate only the necessary size.
Swapping the lines in the sort routine should be done by swapping the pointers, not copying the contents.
allocating arr with malloc is safer and more portable than defining it as a variable sized array with char *arr[linecount];
Here is a modified version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void sortfile(char **arr, int linecount) {
for (;;) {
int swapped = 0;
for (int j = 1; j < linecount; j++) {
if (strcmp(arr[j - 1], arr[j]) > 0) {
char *t = arr[j - 1];
arr[j - 1] = arr[j];
arr[j] = t;
swapped = 1;
}
}
if (swapped == 0)
break;
}
}
int main() {
FILE *fileIN, *fileOUT;
char singleline[500];
int i, linecount;
fileIN = fopen("test1.txt", "r");
if (fileIN == NULL) {
fprintf(stderr, "cannot open %s\n", "test1.txt");
return 1;
}
linecount = 0;
while (fgets(singleline, 500, fileIN)) {
linecount++;
}
printf("line count=%d\n", linecount);
char **arr = malloc(sizeof(*arr) * linecount);
if (arr == NULL) {
fprintf(stderr, "memory allocation failure\n");
return 1;
}
rewind(fileIN);
for (i = 0; i < linecount && fgets(singleline, 500, fileIN) != NULL; i++) {
arr[i] = strdup(singleline);
if (arr[i] == NULL) {
fprintf(stderr, "memory allocation failure\n");
return 1;
}
}
fclose(fileIN);
if (i != linecount) {
fprintf(stderr, "line count mismatch: i=%d, lilnecount=%d\n",
i, linecount);
linecount = i;
}
sortfile(arr, linecount);
for (i = 0; i < linecount; i++) {
printf("%s", arr[i]);
}
fileOUT = fopen("out.txt", "w");
if (!fileOUT) {
fprintf(stderr, "cannot open %s\n", "out.txt");
return 1;
}
for (i = 0; i < linecount; i++) {
fprintf(fileOUT, "%s", arr[i]);
}
fclose(fileOUT);
for (i = 0; i < linecount; i++) {
free(arr[i]);
}
free(arr);
return 0;
}
To get a different sort order, you would change the comparison function. Instead of strcmp() you could use this:
#include <ctype.h>
int my_strcmp(const char *s1, const char *s2) {
/* compare strings lexicographically but swap lower and uppercase letters */
unsigned char c, d;
while ((c = *s1++) == (d = *s2++)) {
if (c == '\0')
return 0; /* string are equal */
}
/* transpose case of c */
if (islower(c)) {
c = toupper(c);
} else {
c = tolower(c);
}
/* transpose case of d */
if (islower(d)) {
d = toupper(d);
} else {
d = tolower(d);
}
/* on ASCII systems, we should still have c != d */
/* return comparison result */
if (c <= d)
return -1;
} else {
return 1;
}
}

Sort ints from a txt file

I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}

Array elements are "lost" outside the function

I have a matrix in a file like:
3
1 2 3
4 5 6
7 8 -9
where the first line indicates the square matrix order. I'm using the following code to read the file and store it into a vector (I have removed all if checks for sake of simplicity):
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int read_matrix_file(const char *fname, double *vector)
{
/* Try to open file */
FILE *fd = fopen(fname, "r");
char line[BUFSIZ];
fgets(line, sizeof line, fd);
int n;
sscanf(line, "%d", &n)
vector = realloc(vector, n * n * sizeof *vector);
memset(vector, 0, n * n * sizeof *vector);
/* Reads the elements */
int b;
for(int i=0; i < n; i++) {
// Read the i-th line into line
if (fgets(line, sizeof line, fd) == NULL) {
perror("fgets");
return(-1);
}
/* Reads th j-th element of i-th line into the vector */
char *elem_ptr = line;
for (int j=0; j < n; j++) {
if(sscanf(elem_ptr, "%lf%n", &vector[n*i+j] , &b) != 1) {
perror("sscanf");
return(1);
}
elem_ptr += b;
}
}
fclose(fd);
/* HERE PRINTS OK */
for(int i=0; i<n*n; i++)
printf("%i %f\n",i, vector[i]);
return n;
}
The read_matrix_file receives a filename and an array of doubles and fill the array, returning the matrix order. The expected usage can be seen in this code block.
int main(void)
{
const char *fname = "matrix.txt";
double *vector = malloc(sizeof * vector);
int n = read_matrix_file(fname, vector);
/* Here prints junk */
for(int i=0; i<n*n; i++)
printf("%i %f\n",i, vector[i]);
free(vector);
}
The issue is, the printf works fine inside the read_matrix_file but seems invalid in main.
I'm allocating the array outside the function and passing it by "reference", but I'm very suspecious of realloc, unfortunatelly I don't know how to fix or a better approach.
You are reallocating memory inside read_matrix_file() and storing the elements of the matrix in that memory region. But when you come out of the function, since the pointer vectoris a local variable, its new value is lost when you leave the function.
When you come back inside main() vector still points to the (now likely invalid) memory region that you had previously allocated with malloc().
You should either allocate large enough memory before calling read_matrix_file or pass a double pointer (**) if you want to modify the pointer and see the change reflected back in main()
What I meant is something like this:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int read_matrix_file(const char *fname, double **p_vector)
{
/* Try to open file */
FILE *fd = fopen(fname, "r");
char line[BUFSIZ];
fgets(line, sizeof line, fd);
int n;
sscanf(line, "%d", &n);
*p_vector = realloc(*p_vector, n * n * sizeof **p_vector);
double *vector = *p_vector;
memset(vector, 0, n * n * sizeof *vector);
/* Reads the elements */
int b;
for(int i=0; i < n; i++) {
// Read the i-th line into line
if (fgets(line, sizeof line, fd) == NULL) {
perror("fgets");
return(-1);
}
/* Reads th j-th element of i-th line into the vector */
char *elem_ptr = line;
for (int j=0; j < n; j++) {
if(sscanf(elem_ptr, "%lf%n", &vector[n*i+j] , &b) != 1) {
perror("sscanf");
return(1);
}
elem_ptr += b;
}
}
fclose(fd);
/* HERE PRINTS OK */
for(int i=0; i<n*n; i++)
printf("%i %f\n",i, vector[i]);
return n;
}
In main, call it with:
int n = read_matrix_file(fname, &vector);
EDIT: Please note that this code does not handle the failure of realloc() properly.

Find combination of groups and letters

I have to find a combination of groups of letters, second letter in first group should be the same as first letter in second group etc.
For example, solution for this group: AA, CB, AC, BA, BD, DB
is this: CB, BD, DB, BA, AA, AC
I have this code so far, it works, but if there is a lot of groups, it takes ages to compute. I need to make it more efficient.
In the input file, there's this input
10
C D
B C
B B
B B
D B
B B
C A
A B
B D
D C
My code
#include <stdio.h>
#include <stdlib.h>
void permutation(char group[][2], int buffer, int sum) {
int i, j;
char temp;
if (buffer == sum && group[1][1] == group[sum][2]) {
for (i = 1; i < sum; i++)
if (group[i][2] != group[i+1][1]) break;
if (i == sum) {
FILE *output;
output = fopen("output.txt", "a");
for (j = 1; j <= sum; j++) {
fprintf(output, "%c %c\n", group[j][1], group[j][2]);
}
exit(1);
}
} else {
for (i = buffer; i <= sum; i++) {
temp = group[buffer][1];
group[buffer][1] = group[i][1];
group[i][1] = temp;
temp = group[buffer][2];
group[buffer][2] = group[i][2];
group[i][2] = temp;
permutation(group, buffer + 1, sum);
temp = group[buffer][1];
group[buffer][1] = group[i][1];
group[i][1] = temp;
temp = group[buffer][2];
group[buffer][2] = group[i][2];
group[i][2] = temp;
}
}
}
int main() {
FILE *input;
input = fopen("input.txt", "r");
int sum, i;
fscanf(input, "%d", &sum);
char group[sum][2];
for (i = 1; i <= sum; i++) {
fscanf(input, "%s", &group[i][1]);
fscanf(input, "%s", &group[i][2]);
}
permutation(group, 1, sum);
}
EDIT So I have made some changes in my program (thanks to your help, I'm very new to programming so I'm sorry for mistakes), I use permutations no more and I'm just finding path. It works well, but now my input has 100000 groups and it takes a lot of time once again (about 2 hours and I need to make it done in 1 hour in maximal). I will probably have to do that in other way once again xD Any ideas ?
#include <stdio.h>
int find(char group[][2], int buffer, int sum, int path[]) {
int i, j;
for (i = 0; i < sum; i++) {
for (j = 0; j < buffer; j++)
if (path[j] == i)
break;
if (buffer == 0 ||
(group[path[buffer-1]][1] == group[i][0] && buffer == j)) {
printf("%d\n", buffer); // just for me to know what program is currently computing
path[buffer] = i;
find(group, buffer + 1, sum, path);
if (path[sum-1] != 0)
return;
}
}
}
int main() {
FILE *input = fopen("input.txt", "r");
if (input != NULL) {
int sum, i;
fscanf(input, "%d", &sum);
char group[sum][2];
int path[sum];
for (i = 0; i < sum; i++)
fscanf(input, " %c %c", &group[i][0], &group[i][1]);
for (i = 0; i < sum;i++)
path[i] = 0;
find(group, 0, sum, path);
FILE *output = fopen("output.txt", "a");
for (i = 0; i < sum; i++)
fprintf(output, "%c %c\n", group[path[i]][0], group[path[i]][1]);
} else
printf("Input file was not found.");
}
In C array indices start at 0, so an array of size N has valid indices from 0 to N-1. In the code above you are accessing the array group out of bounds, since it has size 2 (valid indices are therefore 0 and 1), yet you are trying to access indices 1 and 2.
Either change:
char group[sum][2];
to:
char group[sum][3];
or use indices 0/1 rather than 1/2.
Note also that your code lacks error checking, e.g. on the call to fopen.
Your program as several issues:
you use 1 based indexing, which causes confusion and leads to referencing arrays and subarrays beyond their defined ends.
you parse the input with fscanf using the %s specifier: this is unsafe and will write 2 bytes for each of your inputs, writing beyond the end of each subarray and beyond the end of the last array.
You already know how to fix these, preferably by using 0 based indexing
Your algorithm is very ineffective, complexity O(n!) because you enumerate all possible permutations and check for validity only on complete permutations. You can drastically improve the performance by only enumerating permutations which already verify the constraint for their initial elements. The complexity is substantially lower, still quadratic but n is quite small.
Here is a modified version of your code that does this:
#include <stdio.h>
int permutation(char group[][2], int buffer, int sum) {
if (buffer == sum)
return group[sum-1][1] == group[0][0];
for (int i = buffer; i < sum; i++) {
if (group[buffer-1][1] == group[i][0]) {
char temp = group[buffer][0];
group[buffer][0] = group[i][0];
group[i][0] = temp;
temp = group[buffer][1];
group[buffer][1] = group[i][1];
group[i][1] = temp;
if (permutation(group, buffer + 1, sum))
return 1;
temp = group[buffer][0];
group[buffer][0] = group[i][0];
group[i][0] = temp;
temp = group[buffer][1];
group[buffer][1] = group[i][1];
group[i][1] = temp;
}
}
return 0;
}
int main(void) {
FILE *input = fopen("input.txt", "r");
int sum, i;
if (input != NULL) {
if (fscanf(input, "%d", &sum) != 1 || sum <= 0) {
printf("invalid number of pairs\n");
fclose(input);
return 1;
}
char group[sum][2];
for (i = 0; i < sum; i++) {
if (fscanf(input, " %c %c", &group[i][0], &group[i][1]) != 2) {
printf("incorrect input for pair number %d\n", i);
fclose(input);
return 1;
}
}
fclose(input);
if (permutation(group, 1, sum)) {
FILE *output = fopen("output.txt", "a");
if (output == NULL) {
printf("cannot open output file\n");
return 2;
}
for (i = 0; i < sum; i++) {
fprintf(output, "%c %c\n", group[i][0], group[i][1]);
}
fclose(output);
return 0;
} else {
printf("complete path not found\n");
return 1;
}
}
printf("cannot open input file\n");
return 2;
}
I modified other aspects of the code to improve efficiency and reusability:
input is checked for validity.
the recursive function stops and returns 1 when it finds a complete path. This allows the program to continue whether it found the path or not.
output is handled from the main function for consistency.
The above code solves the problem for the specified input with n=50 in less than 0.002 seconds on my laptop. It prints F C C E E F F E E E E E E E E E E B B F F E E A A F F C C A A A A E E F F C C E E E E E E E E E E B B C C E E E E F F E E F F F F E E C C E E E E E E B B F F A A D D A A C C C C E E E E E E B B D D F
EDIT I realized that, since you are looking for a full closed path, you do not need to try different possibilities for the first pair. main can call permutation with 1 instead of 0 and the permutation can be simplified as buffer can never be 0.
Your new code has some problems:
find is defined asa returning int, but you return nothing. You indeed do not test if you have found a complete path, fully relying on the assumption that there is at least one and that you have found it.
You do not test for path closure. You may find a closed path by chance, but you may also produce an unclosed path.
Using 2 loops to find the unused pairs is less efficient than using a temporary array used[sum].
The first pair is always the first, so you can simplify the find function a little.
Here is an improved version:
#include <stdio.h>
int find(char group[][2], int buffer, int sum, int path[], unsigned char used[]) {
int i;
char last = group[path[buffer-1]][1];
if (buffer == sum)
return last == group[0][0];
for (i = 1; i < sum; i++) {
if (!used[i] && last == group[i][0]) {
path[buffer] = i;
used[i] = 1;
if (find(group, buffer + 1, sum, path, used))
return 1;
used[i] = 0;
}
}
return 0;
}
int main() {
FILE *input = fopen("input.txt", "r");
if (input != NULL) {
int sum = 0, i;
fscanf(input, "%d", &sum);
char group[sum][2];
int path[sum];
unsigned char used[sum];
for (i = 0; i < sum; i++)
fscanf(input, " %c %c", &group[i][0], &group[i][1]);
path[0] = 0; // always start at first element
used[0] = 1;
for (i = 1; i < sum; i++)
used[i] = 0;
if (find(group, 1, sum, path, used)) {
FILE *output = fopen("output.txt", "a");
for (i = 0; i < sum; i++)
fprintf(output, "%c %c\n", group[path[i]][0], group[path[i]][1]);
}
} else {
printf("Input file was not found.");
}
return 0;
}
EDIT: I tested this new version with your large input file: it crashes on my laptop. The previous version with the permutation functions works like a charm, producing the complete path in 0.060 seconds. So there is a complete path and something is wrong with this find function.
There are few differences between the algorithms:
permutation uses less stack space: a single automatic array of size n*2 (200k) versus 3 automatic arrays total size n*(sizeof(int) + 3) (700k).
permutation uses fewer variables, so recursion uses less stack space, but both probably use more than 1 MB of stack space to recurse 100000 times.
find does more scans, where permutation swaps group pairs and always snaps the next one directly.
I reimplemented find without recursion and finally got it to produce a complete path. It is a different one and it takes much longer to compute, 3.5 seconds.
For larger input files, you definitely should not use recursion and you should even allocate the arrays from the heap with malloc.
Here is the non recursive code, using heap memory:
#include <stdio.h>
#include <stdlib.h>
int find(const char group[][2], int sum, int path[]) {
path[0] = 0;
if (sum <= 1)
return group[0][1] == group[0][0];
unsigned char *used = calloc((size_t)sum, sizeof(*used));
for (int buffer = 1, i = 1;; i++) {
if (i == sum) {
--buffer;
if (buffer == 0) {
free(used);
return 0;
}
i = path[buffer];
used[i] = 0;
} else
if (!used[i] && group[path[buffer-1]][1] == group[i][0]) {
path[buffer] = i;
if (buffer == sum - 1) {
if (group[i][1] == group[0][0]) {
free(used);
return 1;
}
} else {
buffer++;
used[i] = 1;
i = 0;
}
}
}
}
int main() {
FILE *input = fopen("input.txt", "r");
if (input != NULL) {
int sum = 0, i;
fscanf(input, "%d", &sum);
char (*group)[2] = calloc((size_t)sum, sizeof(*group));
int *path = calloc((size_t)sum, sizeof(*path));
for (i = 0; i < sum; i++)
fscanf(input, " %c %c", &group[i][0], &group[i][1]);
if (find(group, sum, path)) {
FILE *output = fopen("output.txt", "a");
for (i = 0; i < sum; i++)
fprintf(output, "%c %c\n", group[path[i]][0], group[path[i]][1]);
}
} else {
printf("Input file was not found.");
}
return 0;
}

Resources