I am trying to write a program to compile with Xeon Phi and it says there is a segmentation fault? I think it is when I try to fill the arrays with the getc function. I have written this code several different formats, and I understand that this might not be the most efficient, but I need to test it out to see if it will work by parallelizing it
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
//#include <omp.h>
int main()
{
struct stat buf1;
struct stat buf2;
FILE *fp1, *fp2;
int ch1, ch2;
clock_t elapsed;
char fname1[40], fname2[40];
printf("Enter name of first file:");
fgets(fname1, 40, stdin);
while (fname1[strlen(fname1) - 1] == '\n')
{
fname1[strlen(fname1) -1] = '\0';
}
printf("Enter name of second file:");
fgets(fname2, 40, stdin);
while (fname2[strlen(fname2) - 1] == '\n')
{
fname2[strlen(fname2) -1] = '\0';
}
fp1 = fopen(fname1, "rb");
if (fp1 == NULL)
{
printf("Cannot open %s for reading\n", fname1);
exit(1);
}
fp2 = fopen(fname2, "rb");
if (fp2 == NULL)
{
printf("Cannot open %s for reading\n", fname2);
exit(1);
}
stat(fname1, &buf1);
size_t size1 = buf1.st_size;
stat(fname2, &buf2);
size_t size2 = buf2.st_size;
printf("Size of file 1: %zd\n", size1);
printf("Size of file 2: %zd\n", size2);
elapsed = clock(); // get starting time
size_t smallest = 0;
if(size1 < size2)
{
smallest = size1;
}
else
{
smallest = size2;
}
printf("Smallest Value: %zu\n", smallest);
size_t i, j, k;
size_t data[smallest];
size_t arry1[smallest];
size_t arry2[smallest];
unsigned long long counter = 0;
for(i = 0; i < smallest; i++)
{
data[i] = 1;
arry1[i] = getc(fp1);
arry2[i] = getc(fp2);
}
//#pragma omp for //reduction(+:counter)
for(k = 0; k < smallest; k++)
{
if((arry1[k] ^ arry2[k]) == 0)
{
counter+= data[k];
}
}
fclose (fp1); // close files
fclose (fp2);
float percent = (float)counter / (float)smallest * 100.0f;
printf("Counter: %zu Total: %zu\n", counter, smallest);
printf("Percentage: %.2f%\n", percent);
elapsed = clock() - elapsed; // elapsed time
printf("That took %.2f seconds.\n", (float)elapsed/CLOCKS_PER_SEC);
return 0;
}
Thanks for your help in advance!
You cannot declare an array with a size that's not known at compile time:
int smallest;
smallest = .... // some computation
size_t data[smallest]; // this is wrong!
You should instead use malloc() to accomplish that:
size_t *data;
smallest = ... // whatever
data = malloc(smallest * sizeof(size_t));
This loop:
while (fname1[strlen(fname1) - 1] == '\n')
fname1[strlen(fname1) -1] = '\0';
will read off the start of the string if the line was blank (i.e. "\n"). Change while to if.
Also, check that smallest > 0 before declaring the VLAs.
It might be insightful to output the value of smallest, typical systems default to a stack size of somewhere between 1MB and 8MB, so perhaps you cause a stack overflow here. You could eliminate this possibility by using malloc, as ocho88 suggests (but without the bogus cast):
size_t *data = malloc(smallest * sizeof *data);
size_t *arry1 = malloc(smallest * sizeof *arry1);
size_t *arry2 = malloc(smallest * sizeof *arry2);
if ( !data || !arry1 || !arry2 )
// exit with out-of-memory error
I'm not sure why you use a size_t to store the result of getc.
If this does not solve the problem then it would be useful to identify which line is segfaulting. If you can't get a debugger working, then you can output (to stderr, or to stdout with fflush) to find out where it is getting up to.
Related
I need help to read the numbers of a .txt file and put them in an array. But only from the second line onwards. I'm stuck and don't know where to go from the code that i built.
Example of the .txt file:
10 20
45000000
48000000
56000000
#define MAX 50
int main (void){
FILE *file;
int primNum;
int secNum;
int listOfNumers[50];
int numberOfLines = MAX;
int i = 0;
file = fopen("file.txt", "rt");
if (file == NULL)
{
printf("Error\n");
return 1;
}
fscanf(file, "%d %d\n", &primNum, &secNum);
printf("\n1st Number: %d",primNum);
printf("\n2nd Number: %d",secNum);
printf("List of Numbers");
for(i=0;i<numberOfLines;i++){
//Count the number from the second line onwards
}
fclose(file);
return 0;
}
You just need a loop to keep reading ints from file and populate the listOfNumers array until reading an int fails.
Since you don't know how many ints there are in the file, you could also allocate the memory dynamically. Example:
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE* file = fopen("file.txt", "rt");
if(file == NULL) {
perror("file.txt");
return 1;
}
int primNum;
int secNum;
if(fscanf(file, "%d %d", &primNum, &secNum) != 2) {
fprintf(stderr, "failed reading primNum and secNum\n");
return 1;
}
unsigned numberOfLines = 0;
// allocate space for one `int`
int* listOfNumers = malloc((numberOfLines + 1) * sizeof *listOfNumers);
// the above could just be:
// int* listOfNumers = malloc(sizeof *listOfNumers);
while(fscanf(file, "%d", listOfNumers + numberOfLines) == 1) {
++numberOfLines;
// increase the allocated space by the sizeof 1 int
int* np = realloc(listOfNumers, (numberOfLines + 1) * sizeof *np);
if(np == NULL) break; // if allocating more space failed, break out
listOfNumers = np; // save the new pointer
}
fclose(file);
puts("List of Numbers:");
for(unsigned i = 0; i < numberOfLines; ++i) {
printf("%d\n", listOfNumers[i]);
}
free(listOfNumers); // free the dynamically allocated space
}
There are a few ways to approach this; if you know the size of the first line, you should be able to use fseek to move the position of the file than use getline to get each line of the file:
int fseek(FILE *stream, long offset, int whence);
The whence parameter can be:
SEEK_SET : the Beginning
SEEK_CUR : the current position
SEEK_END : the End
The other option would to encapsulate the entire file read in a while loop:
char *line = NULL;
size_t linecap = 0;
ssize_t linelen;
int counter = 0;
while((linelen = getline(&line, &linecap, file)) != -1){
if counter == 0{
sscanf(line, "%d %d\n", &primNum, &secNum);
}else{
//Process your line
}
counter++; //This would give you your total line length
}
I have a large file containing floating point numbers and I want to read them.
52.881 49.779 21.641 37.230 23.417 7.506 120.190 1.240 79.167 82.397 126.502 47.377 112.583 124.590 103.339 5.821 24.566 38.916 42.576
This is just the beggining of the file. It has 10000000 numbers.
I got this code but I don't know how to print the numbers.
#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#include <fcntl.h>
#include <sysexits.h>
#include <unistd.h>
int main()
{
int fd;
size_t bytes_read, bytes_expected = 1000000*sizeof(double);
double *data;
char *infile = "file.dat";
if ((fd = open(infile,O_RDONLY)) < 0)
err(EX_NOINPUT, "%s", infile);
if ((data = malloc(bytes_expected)) == NULL)
err(EX_OSERR, "data malloc");
bytes_read = read(fd, data, bytes_expected);
if (bytes_read != bytes_expected)
err(EX_DATAERR, "Read only %d of %d bytes",
bytes_read, bytes_expected);
/* print all */
free(data);
exit(EX_OK);
}
You are attempting to read a text file as if the data was binary, so you will read some bytes but the double values stored in the array will not be the values that you wanted to read from the file, you can probably do this
FILE *file;
double *array;
size_t count;
const char *infile = "file.dat";
file = fopen(infile, "r");
if (file == NULL)
return -1;
count = 0;
while (fscanf(file, "%*lf") == 1)
count += 1;
rewind(file);
array = malloc(count * sizeof(*array));
if (array == NULL) {
fprintf(stderr, "cannot allocate %zu bytes!\n", count * sizeof(*array));
fclose(file);
return -1;
}
// Read the values into the array
for (size_t i = 0; i < count; ++i) {
fscanf(file, "%lf", &array[i]);
}
// Print the array
for (size_t i = 0; i < count; ++i) {
fprintf(stdout, "%f\n", array[i]);
}
// Release memory
free(array);
Since you want a fast solution, maybe you have to sacrifice memory.
The faster manner of reading a file is in binary form.
Thus, I would obtain the file size with an efficient method,
then I would allocate memory accordingly,
with the idea of uploading the entire file to memory.
There, since memory reading is faster than file reading,
the data can be quickly read by using sscanf(...).
We can also observe that each floating point number
needs at least 3 characters to be stored in a text file:
1 char for the dot ('.'),
1 char for some digit,
and 1 char for
a space (' ') used to separating a value from its succesor in the
file.
Thus, the file size divided by 3 will be the upper bound for the size of the array of doubles.
#include <stdio.h>
int main(void) {
char *filename = "file.dat";
FILE *F = fopen(filename, "rb");
fseek(F, 0L, SEEK_END);
long int filesize = ftell(F);
rewind(F);
char *data = malloc(filesize+1);
fread(data, filesize, 1, F);
data[filesize] = '\0'; // End of string, just in case
fclose(F);
// The desired data will be stored in array:
double *array = malloc(sizeof(double) * filesize/3);
int ret;
int n; // represents the no chars in a sscanf(...) reading
double *a = array;
while (1) { // Infinite loop...
ret = sscanf(data, " %lg%n", a, &n);
if (ret == EOF) break; // <<---- EXIT POINT of the loop
a++;
data += n;
}
long int array_size = a - array + 1;
}
I'm using qsort to sort an array of i strings of size 256, such as char *arr = malloc(i * 256) -- was actully done with reallocs inside a loop. Each string contains, among text, a number, which I use as the comparison element:
int
cmp(const void *a, const void *b)
{
double atime = get_time((char*)a);
double btime = get_time((char*)b);
return (atime > btime) - (atime < btime);
}
When i is small, it works. With a large i, it fails to sort the array correctly. get_time is working. I was using it with a custom heapsort implementation before, which worked flawlessly.
I added the following to cmp to check what was happening:
fprintf(stderr, "Comparing %f to %f, result: %d.\n", atime, btime, (atime > btime) - (atime < btime));
It seems that all comparisons are correct, but not all comparisons are being made. arr has several strings containing 1.something, however I couldn't find any comparison between numbers greater than 1 in the output. The call to qsort is as follows:
qsort((void*)arr, i-1, MAX_ROW_LEN, cmp);
It's the same parameters I used to pass to my heapsort function, but it doesn't work.
Complete code, and example file (fails to sort).
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define MAX_ROW_LEN 256
#define MAX_FILENAME_LEN 256
/* Return the start time of the event or -1 if no time. */
static double
get_time(const char *event)
{
if (!event || event[0] == '%')
return -1;
size_t tok = strcspn(event, " ") + 2;
double ans = strtod(event + tok, NULL);
if (!ans)
return -1;
return ans;
}
/*static inline*/ int
cmp(const void *a, const void *b)
{
double atime = get_time((char*)a);
double btime = get_time((char*)b);
return (atime > btime) - (atime < btime);
}
int
main(int argc, char **argv)
{
/* process parameters */
if (argc < 2) {
fprintf(stderr, "Supply a file to sort.\n");
exit(EXIT_FAILURE);
}
if (strlen(argv[1]) > MAX_FILENAME_LEN) {
fprintf(stderr, "Filename too long.\n");
exit(EXIT_FAILURE);
}
/* read the file */
printf("Now processing %s.\n", argv[1]);
FILE *f = fopen(argv[1], "r");
if (!f) {
fprintf(stderr, "Failed to open out. Errno %d.\n", errno);
exit(EXIT_FAILURE);
}
char *trace = malloc(MAX_ROW_LEN);
char *header = malloc(MAX_ROW_LEN);
size_t i = 1, j = 1;
while (fgets(trace + (i-1)*MAX_ROW_LEN, MAX_ROW_LEN, f)) {
/* (if we can't get the time, it's part of the header) */
if (get_time(trace + (i-1)*MAX_ROW_LEN) != -1) {
trace = realloc((void*)trace, (++i)*MAX_ROW_LEN);
} else {
strncpy(header + (j-1)*MAX_ROW_LEN, trace + (i-1)*MAX_ROW_LEN,
MAX_ROW_LEN);
header = realloc((void*)header, (++j)*MAX_ROW_LEN);
}
}
if (!feof(f)) {
fprintf(stderr, "Error reading file. Errno %d.\n", ferror(f));
exit(EXIT_FAILURE);
}
printf("Read %zu lines.\n", i);
fclose(f);
/* write the header */
f = fopen("out_fixed", "w");
if (!f) {
fprintf(stderr, "Failed to open out_fixed. Errno %d.\n", errno);
exit(EXIT_FAILURE);
}
for (size_t k = 0; k < j-1; ++k) {
/* (there is '%' in comments, can't print formatted) */
fputs((void*)(header + k*MAX_ROW_LEN), f);
}
/* sort */
printf("Started sorting.\n");
time_t start = time(NULL);
qsort((void*)trace, i-1, MAX_ROW_LEN, cmp);
printf("Ended sorting, took %fs.\n", difftime(time(NULL), start));
/* write the sorted trace */
printf("Started writting to disk.\n");
start = time(NULL);
for (size_t k = 0; k < i-1; ++k) {
fprintf(f, "%s", trace + k*MAX_ROW_LEN);
}
printf("Took %fs.\n", difftime(time(NULL), start));
/* flush */
printf("Closing file (fflush)\n");
start = time(NULL);
if (fclose(f)) {
fprintf(stderr, "Failed to close out_fixed. Errno %d.\n", errno);
exit(EXIT_FAILURE);
}
printf("Took %fs.\n", difftime(time(NULL), start));
exit(EXIT_SUCCESS);
}
I've tested your code and your example input file and it seems to work fine. In your question you say:
... has several strings containing 1.something, however I couldn't find
any comparison between numbers greater than 1 in the output.
But there are no such lines in your example input file.
Given this example line of your input:
12 0.475183170 rank3 STATE fill_row
This line in get_time is going to skip over any leading digits in your double:
size_t tok = strcspn(event, " ") + 2;
strcspn returns the number of characters that it had to read before finding the "needle" so in this case it will return 2. You then add 2 to that and then use that as a pointer offset into your event string, meaning that you are passing a pointer to .475183170 instead of 0.475183170.
You'd be better off just using strchr here anyway:
char *tok = strchr(event, ' ');
if (!tok) {
return -1;
}
double ans = strtod(tok, NULL);
The subsequent strtod will skip leading whitespace for you, so you don't need to get super fancy.
I am trying to read the size of the 2 files to determine which of the two are smaller, but the second file always comes out to zero and the first size is not even correct, any ideas?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main()
{
struct stat buf1;
struct stat buf2;
FILE *fp1, *fp2;
int ch1, ch2;
clock_t elapsed;
char fname1[40], fname2[40];
printf("Enter name of first file:");
fgets(fname1, 40, stdin);
while ( fname1[strlen(fname1) - 1] == '\n')
{
fname1[strlen(fname1) -1] = '\0';
}
printf("Enter name of second file:");
fgets(fname2, 40, stdin);
while ( fname2[strlen(fname2) - 1] == '\n')
{
fname2[strlen(fname2) -1] = '\0';
}
fp1 = fopen(fname1, "r");
if ( fp1 == NULL )
{
printf("Cannot open %s for reading\n", fname1 );
exit(1);
}
fp2 = fopen(fname2, "r");
if (fp2 == NULL)
{
printf("Cannot open %s for reading\n", fname2);
exit(1);
}
//int name1 = fopen(fname1, "r");
//int name2 = fopen(fname2, "r");
stat(fp1, &buf1);
int size1 = buf1.st_size;
stat(fp2, &buf2);
int size2 = buf2.st_size;
printf("Size of file 1: %d\n", size1);
printf("Size of file 2: %d\n", size2);
elapsed = clock(); // get starting time
ch1 = getc(fp1); // read a value from each file
ch2 = getc(fp2);
unsigned long long counter = 0;
unsigned long long total = 0;
while(1) // transform this into a for loop
{
ch1 = getc(fp1);
ch2 = getc(fp2);
if((ch1 ^ ch2) == 0) // try to change this into a for loop?
{
counter++;
}
total++;
if ( ( ch1 == EOF) || ( ch2 == EOF)) // if either file reaches the end, then its over!
{
break; // if either value is EOF
}
}
fclose (fp1); // close files
fclose (fp2);
float percent = (float)counter / (float)total * 100.0f ;
printf("Counter: %u Total: %u\n", counter, total);
printf("Percentage: %.2f%\n", percent);
elapsed = clock() - elapsed; // elapsed time
printf("That took %.4f seconds.\n", (float)elapsed/CLOCKS_PER_SEC);
return 0;
}
Here are the results:
Enter name of first file:air.197901.nc
Enter name of second file:air.197902.nc
Size of file 1: 1340845192
Size of file 2: 0
Counter: 147701939 Total: 1256756880
Percentage: 11.75
That took 105.8533 seconds.
Your code is not even calling fstat. You're calling stat but passing a FILE pointer to it rather than a pathname. You need to either do:
stat(fname1, &buf1);
or:
fstat(fileno(fp1), &buf1);
This mistake should have produced an error (or at least a warning) from the compiler.
Also, you should be checking the return value of stat or fstat.
I am trying to match two words and then print them out e.g 'act' and 'cat' have 'a,'c' and 't' in them so they match. here is my code:
#include <stdio.h>
#include <stdlib.h>
main()
{
FILE *fptr;
char words[100], input[100], store[1000][100]
char ch
int i,j,k,z,b,*ptr;
ptr = &b;
fptr = fopen("d:\\words.txt","r");
if (fptr == NULL)
{
printf("Could not open file");
exit(1);
}
printf("Enter the scrambled word: ");
fflush(stdin);
fgets (input,sizeof(input),stdin);
i = 0;
while (fgets(words,sizeof(words),fptr) != NULL)
{
if (strlen(input) == strlen(words))
{
strcpy(store[i],words);
++i;
}
}
//this is where the problem is:
/*am trying to match the letters in two words, if they don't match then store 1 in b,
if b=0 then print out the word which matched with string 'input'*/
for(z = 0; z < 1000; ++z)
{
b = 0;
for(j = 0; j < strlen(input); ++j)
{
for(k = 0; k < strlen(store[z]); ++k)
{
if(input[j] != store[z][k])
*ptr = 1;
}
}
if(*ptr == 0)
{
printf("Word #%2d is: %s\n", z, store[z]);
}
}
fflush(stdin);
getchar();
}
Please I really need help. Am sorry if I haven't made my question clear.
Sorting the letters in both strings and then comparing them is one of the simpler ways of doing what you require. (assuming you are familiar with sorting)
It may not be the most efficient but I then again, worrying too much about efficiency is usually best left until after you have a working solution and performance metrics.
If you want some more efficient methods to detect if two words are anagrams, check out the link provided by Mats Petersson, Optimizing very often used anagram function
Something like this could also work.. (sorry ugly reading code, very busy with something else)...
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <Windows.h>
#include <string>
#include <list>
#include <map>
#include <sstream>
#include <algorithm>
using namespace std;
map< string, list<string> > items;
int c = 0;
void readFile() {
FILE * f = fopen( "c:\\t\\words.txt", "r" );
fseek(f, 0L, SEEK_END);
int size = ftell(f);
fseek(f, 0L, SEEK_SET);
char * data = (char*)malloc(size);
fread(data, size, 1, f);
string s = string(data);
istringstream reader(s);
while(reader) {
string sub;
reader >> sub;
string original = sub;
sort( sub.begin(), sub.end() );
items[sub].push_back(original);
c++;
}
free(data);
fclose(f);
}
bool check( const string & v ) {
string requestStr = v;
sort( requestStr.begin(), requestStr.end() );
printf("Requested: %s [%s]\n", v.c_str(), requestStr.c_str());
if (items.find(requestStr) == items.end()) {
printf("Not found\n");
return false;
}
list<string>::iterator it = items[requestStr].begin();
while (it != items[requestStr].end()) {
printf("Found: %s\n", (*it).c_str());
it++;
}
}
int main(int argc, char ** argv) {
long t1 = GetTickCount();
readFile();
printf("Read wordlist (%i): %li ms\n", c, GetTickCount() - t1 );
string str = "holiday";
t1 = GetTickCount();
check(str);
printf("Time: %li ms\n", GetTickCount() - t1 );
str = "tac";
t1 = GetTickCount();
check(str);
printf("Time: %li ms\n", GetTickCount() - t1 );
str = "dfgegs";
t1 = GetTickCount();
check(str);
printf("Time: %li ms\n", GetTickCount() - t1 );
}
results on 109000 words file
Read wordlist (109583): 5969 ms
Requested: holiday [adhiloy]
Found: holiday
Time: 0 ms
Requested: tac [act]
Found: act
Found: cat
Time: 0 ms
Requested: dfgegs [defggs]
Not found
Time: 0 ms
120000 searches takes 7188ms, so around 0.0599ms per search...