Related
I' am writing a C program which allows the user to dynamically specify the File name from which the data is to be read. Next the user enters a lower bound and an upper bound. The data in the lines from between the bounds is to be printed.
For this the main function makes a call: readValues(cTargetName, iLower, iHiger);
The function readValues is supposed to work as follows:
Check if file exist, if yes. Open it with fopen
Read with feof and fgets line by line the whole file, and store each line in char string
With a for loop, print the correct range of lines from the string
I'm not sure why but the while loop doesn't seem to exit although I use the feof statement, which should terminate after the end of the File is reached.
The code looks as follows:
#include <stdio.h>
#include <stdlib.h>
void readValues(char cFileName[75], int n, int m)
{
//Variable declaration;
char strArray[50][50];
char *parser;
int i = 0;
FILE *Data;
if(Data = fopen(cFileName, "rt") == NULL){
printf("File could not be opened");
return 1; //Can you return 1 in a void function?
}
//Read the file line by line
while(feof(Data)==0){
fgets(strArray[i], 200, Data);
i++;
}
//Reading the specified lines
for(n; n<=m; n++){
printf("%s", strArray[n]);
}
}
int main()
{
char cTargetName[75] = {"C:/Users/User1/Desktop/C_Projects_1/TestData.txt"};
int iLower = 2;
int iHiger = 4;
readValues(cTargetName, iLower, iHiger);
return 0;
}
All help is appreciated. Thanks in advance!
Here is my solution to your question:
#include <stdio.h>
#include <stdlib.h>
#define MIN_LINE_LENGTH 64
typedef enum {
false, true
} bool;
int main() {
char filename[PATH_MAX] = {0};
printf("Enter filename:\n");
fgets(filename, PATH_MAX, stdin); // get filename from stdin
char *ptr = filename;
while (*ptr) { // remove trailing newline at the end of filename (fgets() includes newline)
if (*ptr == '\n') {
*ptr = 0;
}
++ptr;
}
printf("Enter starting line and end line, separated by a space:\n");
size_t startLine = 0;
size_t endLine = 0;
bool hasFirstNum = false;
bool hasSecondNum = false;
bool hasMiddleSpace = false;
bool hasLastSpace = false;
size_t numCount = 0;
int ch;
while ((ch = fgetc(stdin)) != EOF && ch != '\n') { // continually receive chars from stdin
if (ch != 32 && !(ch >= 48 && ch <= 57)) { // if not a space or number, raise error
fprintf(stderr, "Only numerical values (and spaces) can be entered.\n");
return 1;
}
if (ch == 32) {
if (hasFirstNum) {
hasMiddleSpace = true;
}
if (hasSecondNum) {
hasLastSpace = true;
}
continue;
}
else if (!hasFirstNum) {
++numCount;
hasFirstNum = true;
}
else if (!hasSecondNum && hasMiddleSpace) {
++numCount;
hasSecondNum = true;
}
else if (hasLastSpace) {
++numCount;
}
if (numCount == 1) {
startLine *= 10;
startLine += ch - 48; // '0' character in ASCII is 48
}
else if (numCount == 2){
endLine *= 10;
endLine += ch - 48;
}
else {
break;
}
}
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error opening file.\n");
return 1;
}
char **lines = malloc(sizeof(char *));
char *line = malloc(MIN_LINE_LENGTH);
*lines = line;
int c;
size_t char_count = 0;
size_t line_count = 1;
while ((c = fgetc(fp)) != EOF) { // continually get chars from file stream
if (c == '\n') { // expand lines pointer if a newline is encountered
*(line + char_count) = 0;
++line_count;
lines = realloc(lines, line_count*sizeof(char *));
line = (*(lines + line_count - 1) = malloc(MIN_LINE_LENGTH));
char_count = 0;
continue;
}
if ((char_count + 1) % MIN_LINE_LENGTH == 0 && char_count != 0) { // expand line pointer if needed
line = realloc(line, char_count + MIN_LINE_LENGTH);
}
*(line + char_count) = c;
++char_count;
}
*(line + char_count) = 0; // to ensure the last line always ends with the null byte
if (startLine >= line_count) { // raise error if starting line specified is greater than num. of lines in doc.
fprintf(stderr, "Specified starting line is less than total lines in document.\n");
return 1;
}
if (endLine > line_count) { // adjust ending line if it is greater than number of lines in doc.
endLine = line_count;
}
if (startLine == 0) { // we will be using the starting index of 1 as the first line
startLine = 1;
}
char **linesPtr = lines + startLine - 1;
while (startLine++ <= endLine) { // print lines
printf("%s\n", *linesPtr++);
}
for (size_t i = 0; i < line_count; ++i) { // free all memory
free(*(lines + i));
}
free(lines);
return 0;
}
It is a little more convoluted, but because it uses dynamic memory allocation, it can handle lines of any length within a text file.
If there is anything unclear, please let me know and I would be happy to explain.
Hope this helps!!
several issues here,
first, you limited the length of lines to 200, not exactly what you might expect to get.
the fgets function returns lines up to specified length unless hit by newline character - this should be taken into account.
additionally, fgets returns NULL if you hit EOF - no real need to use feof.
second, you could save yourself a lot of pain and simply count the number of times you get a string, and for the times you are within the range just print it immediately. will save you a nice amount of overhead
like this:
#include <stdio.h>
#include <stdlib.h>
#define MAXLINE 200//or anything else you want
void readValues(char cFileName[75], int n, int m)
{
//Variable declaration;
char line[MAXLINE];
int i = 0;
FILE *Data;
if((Data = fopen(cFileName, "rt")) == NULL){
printf("File could not be opened");
return 1; //Can you return 1 in a void function?
}
//Read the file line by line and print within range of lines
while((line=fgets(line, MAXLINE,Data))!=NULL){//terminates upon EOF
if (++i>=n&&i<=m)
printf(""%s\n",line);
}
}
I recently started at university with C programming (beginner course), and now we are doing our final examination which is about a patients' database.
I'm required to read data from a text file to a struct array (size 10000). The file contains 2 string arrays (personal identification string (10 numbers seperated by a '-') and name string), 1 int array containing photo references and 1 integer containing the amount of photo references per patient. I have tried fscanf but the program just hangs whenever i try to read, when i use fgets, it reads the whole line and stores the integers from the photo reference array into my name array (middle one). I am wondering how I should go about doing this, I've spent days trying to figure out a solution but nothing seems to work. This is what my text file looks like:
123456-1234 Name Name [1, 2, 3, 4]
234567-2345 Name2 Name2 [1, 2]
345678-3456 Name3 Name3 []
And this is my write_to_file function which writes to the file when the program exits:
void write_to_file(Patient reg[], int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","w");
if(*pNr_of_patients>0){
int i,j;
for(i=0;i<*pNr_of_patients;i++){
fprintf(fp,"%s\t%s\t[",reg[i].pers_nr,reg[i].name);
for(j=0;j<reg[i].nr_of_ref-1;j++){
fprintf(fp,"%d, ",reg[i].photo_ref[j]);
}
if(reg[i].photo_ref[j]==0){
fprintf(fp,"]");
}else{
fprintf(fp,"%d]",reg[i].photo_ref[j]);
}
fprintf(fp,"\n");
}
fclose(fp);
}
}
This is my read_from_file function, it's missing code for reading the int array values at the end:
Edit: I added a for loop to remove the characters starting at "[" from the name string, now i just need to know how to read the array values at the end into the struct's photo reference array.
void read_from_file(Patient reg[],int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","r");
if(fp!=NULL){
reg[*pNr_of_patients].nr_of_ref=0;
int i=0, pos;
while(fgets(reg[*pNr_of_patients].pers_nr,13,fp)!=NULL){
reg[*pNr_of_patients].pers_nr[strlen(reg[*pNr_of_patients].pers_nr)-1]='\0';
fgets(reg[*pNr_of_patients].name,31,fp);
reg[*pNr_of_patients].name[strlen(reg[*pNr_of_patients].name)-1]='\0';
for(pos=0;pos<30;pos++){
if(reg[*pNr_of_patients].name[pos]=='['){
reg[*pNr_of_patients].name[pos]='\0';
}
}
(*pNr_of_patients)++;
}
fclose(fp);
}else{
printf("File does not exist\n");
}
}
This is what my Patient struct looks like:
struct patient{
char pers_nr[12], name[30];
int photo_ref[10], nr_of_ref;
};
typedef struct patient Patient;
Calling read_from_file in main:
int main(void){
Patient patient_register[10000];
int nr_of_patients=0;
read_from_file(patient_register,&nr_of_patients);
database_management(patient_register,&nr_of_patients); //this is where I fill all the data into the array before writing to the file at the end
write_to_file(patient_register,&nr_of_patients);
return 0;
}
I think that scanning input is one of the hardest in C. That's why libraries like cs50 exists, to ease up reading input for new C users. Anyway, I constructed my solution, but I redesigned your function.
The first solution reads a single Patient from a line. It does not use sscanf the only standard call that set's errno is to strtol, which is used to convert up numbers.
The second function uses sscanf and some crazy format string construction to stay safe of buffer overflow.
It all brings down at to how the input stream is constructed and how much you trust it.
#include <stdio.h>
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>
struct patient{
char pers_nr[12];
char name[30];
int photo_ref[10];
size_t nr_of_ref;
};
typedef struct patient Patient;
int patient_read_from_line_1(const char line[], Patient *p)
{
assert(line != NULL);
assert(p != NULL);
// check the first 12 characters ----------
// first 6 chars must be numbers
for (int i = 0; i < 6; ++i) {
if (!isdigit(line[i])) {
return -__LINE__;
}
}
// followed by a single '-'
if (line[6] != '-') {
return -__LINE__;
}
// followed by 4 numbers
for (int i = 7; i < 7 + 4; ++i) {
if (!isdigit(line[i])) {
return -__LINE__;
}
}
// followed by a space
if (line[7 + 4] != ' ') {
return -__LINE__;
}
// read up first field ---------------------
// cool first field checks out
memcpy(p->pers_nr, line, 11);
p->pers_nr[11] = '\0';
line += 12;
// let's omit spaces
while (line[0] == ' ') {
line++;
}
// read up second field --------------------------
// now we should read a two strings separated by a space
// so we should read up until a second space
if (!isalpha(*line)) {
return -__LINE__;
}
const char *pnt_first_space = strchr(line, ' ');
if (pnt_first_space == NULL) {
return -__LINE__;
}
const char *pnt_another_space = strchr(pnt_first_space + 1, ' ');
if (pnt_another_space == NULL) {
return -__LINE__;
}
const size_t name_to_read_length = pnt_another_space - line;
if (name_to_read_length > sizeof(p->name)) {
return -__LINE__;
}
memcpy(p->name, line, name_to_read_length);
p->name[name_to_read_length] = '\0';
// buh two fields done, now the array
line += name_to_read_length;
// let's omit the spaces
while (line[0] == ' ') {
line++;
}
// read up array -----------------------------------
// array
if (line[0] != '[') {
return -__LINE__;
}
line++;
for (size_t numscnt = 0;; ++numscnt) {
if (numscnt >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
return -__LINE__;
}
char *pnt;
errno = 0;
long num = strtol(line, &pnt, 10);
if (errno) {
return -__LINE__;
}
if (!(INT_MIN < num && num < INT_MAX)) {
return -__LINE__;
}
p->photo_ref[numscnt] = num;
line = pnt;
// omit spaces
while (*line == ' ') line++;
// now we should get a comma
if (line[0] != ',') {
// if don't get a comma, we need to get a ]
if (line[0] == ']') {
// cool
++line;
// but remember to save the count
p->nr_of_ref = numscnt + 1;
// cool
break;
}
return -__LINE__;
}
++line;
// omit spaces
while (*line == ' ') line++;
// start again
}
// this needs to be end of line or newline
if (line[0] != '\0' && line[0] != '\n') {
return -__LINE__;
}
// success!
return 0;
}
// ok, ok, ok, let's use sscanf
int patient_read_from_line_2(const char line[], Patient *p)
{
assert(line != NULL);
assert(p != NULL);
int ret;
int pos;
// read up first fiedl and half of the second ------------------
ret = sscanf(line, "%12s %30[^ ] %n", p->pers_nr, p->name, &pos);
if (ret != 2) {
return -__LINE__;
}
line += pos;
// read up another half of the second field -------------------
const size_t cur_name_len = strlen(p->name);
p->name[cur_name_len] = ' ';
char tmp[20];
ret = snprintf(tmp, 20, "%%%d[^ ] [%%n", (int)(sizeof(p->name) - cur_name_len - 1));
if (ret < 0) {
return -__LINE__;
}
ret = sscanf(line, tmp, &p->name[cur_name_len + 1], &pos);
if (ret != 1) {
return -__LINE__;
}
line += pos;
// read up array *sigh* -------------------------------------------
for (p->nr_of_ref = 0;; ++p->nr_of_ref) {
if (p->nr_of_ref >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
return -__LINE__;
}
ret = sscanf(line, " %d%1s%n", &p->photo_ref[p->nr_of_ref], tmp, &pos);
if (ret == 0) {
// hm...
if (line[0] == ']') {
// ach all ok, empty numbers list;
line++;
p->nr_of_ref++;
break;
}
return -__LINE__;
}
if (ret != 2) {
return -__LINE__;
}
line += pos;
if (tmp[0] != ',') {
if (tmp[0] == ']') {
// whoa! success
p->nr_of_ref++;
// cool
break;
}
return -__LINE__;
}
}
// so what's left? - EOF or newline
if (line[0] != '\0' && line[0] != '\n') {
return -__LINE__;
}
// success!
return 0;
}
long patient_read_from_file(FILE *fp, Patient patients[], size_t patients_len)
{
size_t patients_cnt = 0;
char line[256];
// for each line in file
while (fgets(line, sizeof(line), fp) != NULL) {
const int ret = patient_read_from_line_2(line, &patients[patients_cnt]);
if (ret < 0) {
// hanle reading error
return ret;
}
patients_cnt++;
if (patients_cnt > patients_len) {
// no more memory in patients left
return -__LINE__;
}
}
return patients_cnt;
}
void patient_fprintln(FILE *f, const Patient *p)
{
fprintf(f, "%s %s [", p->pers_nr, p->name);
for (size_t i = 0; i < p->nr_of_ref; ++i) {
fprintf(f, "%d", p->photo_ref[i]);
if (i + 1 != p->nr_of_ref) {
fprintf(f, ",");
}
}
fprintf(f, "]\n");
}
int main()
{
FILE *fp;
fp = stdin; // fopen("file.txt","r");
if (fp == NULL) {
return -__LINE__;
}
Patient patients[3];
const long patients_cnt = patient_read_from_file(fp, patients, sizeof(patients)/sizeof(*patients));
if (patients_cnt < 0) {
fprintf(stderr, "patient_read_from_file error %ld\n", patients_cnt);
return patients_cnt;
}
fclose(fp);
printf("Readed %d patients:\n", patients_cnt);
for (size_t i = 0; i < patients_cnt; ++i) {
patient_fprintln(stdout, &patients[i]);
}
return 0;
}
Live version available at onlinedbg.
This can be simplified for 100%. This has bugs for 100%. It is just to show what methods (strtol, memcpy, sscanf, isdigit, isalpha) are sometimes used by people to read from input. Also I specify length modifier to scanf (sscanf(..., "%12s") to handle overflows (hopefully). Try to always check return values from scanf and other standard functions (maybe checking snprintf return value is a little too much, but hey, let's be consistent). Be vary, that on some platforms the %n scanf modifier happens not to work. Also this can be build up to use dynamic allocation using malloc, realloc and free, both on line reading (basically it is equal to writing custom version of GNU getline), reading strings from input, reading int's array from input and dynamic allocations of patients.
This was meant as a comment but got too long, so I type it here.
read_from_file() appears overly complex. You might consider revisiting fscanf, reading the photo references as a whole string and then parsing into integers which you can assign to the photo_ref array. (While the code below might compile, I haven't verified that it works. It's just an idea of how one might proceed.)
void read_from_file (Patient reg[], int *pNr_of_patients)
{
FILE *fp;
fp = fopen ("file.txt", "r");
if (fp != NULL)
{
int n;
int i = 0; // position in photo_ref
char refs[30];
*pNr_of_patients = 0;
while (EOF !=
(n =
fscanf (fp, "%s %[^[]%[^]]]", reg[*pNr_of_patients].pers_nr,
reg[*pNr_of_patients].name, refs)))
{
// btw, reg[*pNr_of_patients].name may contain terminating blanks. right trim it. that's easy enough.
if (n > 2)
{ /* found photo refs.Now split the string into integers */
char *s = refs + 1; //skip '['
char *p;
while (*s && i<10){ // scan for the integers, 10 of them
while (*s && *s == ' ')
s++; // skip blanks
p = s; // mark start of number
while (*p && *p != ',')
p++;
if (*p == ',')
*p = 0;
reg[*pNr_of_patients].photo_ref[i++] = atoi (s); //tip: use strtol(3), verify that `i' isnt larger than size of the array
s = p + 1; // skip ','. Must Do: verify that `s' hasnt yet moved past the end of `ref'!!
}
}
(*pNr_of_patients)++;
}
fclose (fp);
}
else
{
printf ("File does not exist\n");
}
}
There are some good answers already, but most of them try to use a single method to parse all elements of the line. I would read whole lines into a buffer first, then use sscanf() to parse the patient number and name, but use strtok() to split the array into its individual components:
void read_from_file(Patient reg[], int *pNr_of_patients) {
FILE *fp = fopen("file.txt", "r");
if (!fp) {
fprintf(stderr, "Error opening file: %s\n", strerror(errno));
*pNr_of_patients = 0;
return;
}
char line[1024];
int i = 0;
while (fgets(line, sizeof line, fp)) {
int offset = 0;
int refs = 0;
sscanf(line, "%11s %29[^[] [%n", ®[i].pers_nr, ®[i].name, &offset);
for (char *tok = strtok(line + offset, ","); tok && refs < 10; tok = strtok(NULL, ",")) {
if (*tok != ']')
reg[i].photo_ref[refs++] = atoi(tok);
}
reg[i].nr_of_ref = refs;
i++;
}
*pNr_of_patients = i;
}
Divide and Conquer
Break this down into steps. Make a function that populates 1 Patient.
The below is untested code. Consider it a starting point. The deign goal is to make a function that reads 1 line into 1 Patient.
Read in 1 entire line
// return 1: success, 0: failure EOF:end-of-file
int read_once_from_file(FILE *stream, Patient *pat_ptr) {
Patient pat = { 0 };
char buffer[100 + 30*13];
if (fgets(buffer, sizeof buffer, stream) == NULL) {
return EOF;
}
Parse the first part. Use "%n" which records the parsing offset. Use width limits on string input.
int n = 0;
if (sscanf(buffer, " %11[^\t] %29[^\t] [ %n", pat.pers_nr, pat.name) != 2) {
return 0; // improper formatted input
}
char *p = buffer + n;
Now look for ']' and photo_ref
if (*p != ']') {
for (pat.nr_of_ref=0; ; pat.nr_of_ref++) {
if (sscanf(p, "%d %n", &pat.photo_ref[i], &n) != 1) {
return 0; // improper formatted input
}
p += n;
if (*p == ']') {
pat.nr_of_ref++;
break;
}
if (*p != ',' || pat.nr_of_ref + 1 == 10) {
return 0; // improper formatted input
}
p++;
}
}
Save result
*pat_ptr = pat;
return 1;
}
Call read_once_from_file() as needed
void read_from_file(Patient reg[],int *pNr_of_patients){
*pNr_of_patients = 0;
FILE *fp = fopen("file.txt","r");
if(fp){
for (int i = 0; i<10000; i++) {
int count = read_once_from_file(fp, ®[i]);
if (count == EOF) {
break;
}
if (count != 1) {
// error
fprintf(stderr, "Input error\n");
break;
}
}
*pNr_of_patients = i;
fclose(fp);
}
}
I am trying to read a specific line from a file using the code below.
char *getlinenum(char *filename, int lnum)
{
FILE *f;
int i;
char *linebuf = NULL, *tmp = NULL;
if ((f = fopen(filename, "r")) != NULL)
{
linebuf = (char *)malloc(2048);
memset(linebuf, 0, 2048);
for (i = 0; i < lnum; i++)
{
if (fscanf(f, "%[^\n]\n", linebuf) == EOF)
{
free(linebuf);
fclose(f);
printf("Returning NULL\n");
return NULL;
}
}
//tmp = strdup(linebuf);
//free(linebuf);
fclose(f);
return linebuf;
}
return NULL;
}
No matter what, this is always returning just an empty (zero) string. Any issues you see? Here is the test file:
/home/mainframe/b
/home/mainframe/dead.letter
/home/mainframe/.bash_history
/home/mainframe/a
/home/mainframe/f
/home/mainframe/e
/home/mainframe/c
/home/mainframe/g
/home/mainframe/.ssh/authorized_keys
/home/mainframe/.ssh
/home/mainframe/d
I don't really understand where it could end up with a zero string (not a nullpointer).
This code is working for me (made no changes except removing uncesessary tmp variable)
One issue is if 0 is passed, the for loop never enters. Just change it to <= and/or add another if-statement at the beginning:
if( lnum <= 0 )
return NULL;
to catch this issue.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *getlinenum(char *filename, int lnum)
{
FILE *f;
int i;
char *linebuf = NULL;
if( lnum <= 0 )
return NULL;
if ((f = fopen(filename, "r")) != NULL)
{
linebuf = (char *)malloc(2048);
memset(linebuf, 0, 2048);
for (i = 0; i <= lnum; i++)
{
if (fscanf(f, "%[^\n]\n", linebuf) == EOF)
{
free(linebuf);
fclose(f);
printf("Returning NULL\n");
return NULL;
}
}
free(linebuf);
fclose(f);
return linebuf;
}
return NULL;
}
int main()
{
printf("%s\n", getlinenum("input.txt", 2));
return 0;
}
Output:
/home/mainframe/dead.letter
A fairly obvious problem not mentioned yet is that this code overflows the buffer if there is a line longer than 2048.
Another problem is that your fscanf string will skip blank lines (except for the first line of the file). I'm not sure if this was intentional. The \n matcher that you have on the end of the string means to match all whitespace up till the next non-whitespace even if that whitespace includes multiple newlines.
To fix that problem, you could remove that \n and just do a fgetc() after each fscanf to consume one newline.
To fix the buffer overflow I would recommend skipping up to the line you want without storing anything, and then using a fgets to get the line you are interested in. For example (here I also have factored out the cleanup code):
if (lnum < 1 || (f = fopen(filename, "r")) == NULL)
return NULL;
char *buffer = NULL;
for ( ; lnum > 1; --lnum )
{
if ( fscanf(f, "%*[^\n]") == EOF || fgetc(f) == EOF )
break;
}
if ( lnum == 1 )
{
// or use the POSIX getline() function or similar, to avoid any size limitation and
// avoid the mucking around with fgets and \n
buffer = calloc(1, 2048);
if ( ! fgets(buffer, 2048, f) )
{
free(buffer);
buffer = NULL;
}
else if ( buffer[0] && buffer[strlen(buffer)-1] == '\n' )
buffer[strlen(buffer)-1] = 0;
}
fclose(f);
return buffer;
Also, using unsigned long long for line_num would let you read more!
I meet a problem of a c program: the int variable is changed unexpected.
Below is all about the problem:
I try to read a txt file which looks like:
2013/12/31 19:53:54, started, /activeJob/start/ Failed
2013/12/31 19:55:55, ended, retCode = 6, Couldn't resolve host name, /activeJob/finish/ Failed
2014/01/01 08:06:55, started, /activeJob/start/ Failed
2014/03/04 12:16:55, started, /activeJob/start/ Success
2014/03/04 12:17:25, ended, retCode = 0, No error, /activeJob/finish/ success
2014/03/04 13:57:21, started, /activeJob/start/ Success
It is a log file which will record the start/finish time of a task. I want to parse the log file and find the finished task record in a order time(latest first). For example, I will try to read the last line and it shows that the task is running. Hence I ignore it and continue to read the last 2nd line. In general the next two line which has "ended" and "started" in pairs can be marked as a record.
My environment is: Centos6.5 (installed via VMWaire).
Below is the source code and it uses libccgi:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "json/json.h"
#include "ccgi.h"
#include <errno.h>
const char *queryName = "account";
const char *queryPage = "pageIndex";
const char *startAction = "/activeJob/start/";
const char *finishAction = "/activeJob/finish/";
const char *contentDes[] = {"there is backup processing, start at :","there is no backup"};
const float pageNums = 8.0;
const char * jsonStringCreate(json_object *jsonObj,int statueCode, char *content, int totalPages)
{
json_object_object_add(jsonObj, "statueCode", json_object_new_int(statueCode));
json_object_object_add(jsonObj, "content", json_object_new_string(content));
json_object_object_add(jsonObj, "totalPages", json_object_new_int((int)totalPages));
//the memory of returned string is under control of jsonObj
return json_object_get_string(jsonObj);
}
char *mallocString(char *string)
{
char *returnString = malloc(sizeof(char) * (1 + strlen(string)));
strcpy(returnString, string);
//owner free the returned string
return returnString;
}
/* File must be open with 'b' in the mode parameter to fopen() */
/* Set file position to size of file before reading last line of file */
char* fgetsr(char* buf, int n, FILE* binaryStream)
{
long fpos;
int cpos;
int first = 1;
if (n < 1 || (fpos = ftell(binaryStream)) == -1 || fpos == 0)
return NULL;
cpos = n - 1;
buf[cpos] = '\0';
for (;;)
{
int c;
if (fseek(binaryStream, --fpos, SEEK_SET) != 0 ||
(c = fgetc(binaryStream)) == EOF)
return NULL;
if (c == '\n' && first == 0) /* accept at most one '\n' */
break;
first = 0;
if (c != '\r') /* ignore DOS/Windows '\r' */
{
unsigned char ch = c;
if (cpos == 0)
{
memmove(buf + 1, buf, n - 2);
++cpos;
}
memcpy(buf + --cpos, &ch, 1);
}
if (fpos == 0)
{
fseek(binaryStream, 0, SEEK_SET);
break;
}
}
memmove(buf, buf + cpos, n - cpos);
return buf;
}
</code></pre>
<pre><code>
int main(int argc, char const *argv[], char **env)
{
int statueCode = 0;
int totalPages = 0;
char *content = NULL;
json_object *jsonObj = json_object_new_object();
printf("Content-type: text/plain; encoding=utf-8\n\n");
CGI_varlist *vl;
const char *name;
CGI_value *value;
int i;
if ((vl = CGI_get_all("/tmp/cgi-upload-XXXXXX") ) == 0)
{
// CGI error
// fputs("CGI_get_all() failed\r\n", stdout);
statueCode = 501;
content = mallocString("CGI error");
}
else
{
//get the CGI env parameters, next to get the query parameter
char *accountName = NULL;
int queryIndex = -1;
for (name = CGI_first_name(vl); name != 0; name = CGI_next_name(vl))
{
value = CGI_lookup_all(vl, 0);
for ( i = 0; value[i] != 0; ++i)
{
if (strcmp(name, queryName) == 0)
{
accountName = malloc(sizeof(char) * (strlen(value[i]) + 4 + 1));
strcpy(accountName, value[i]);
strcat(accountName, ".log");
}
else if (strcmp(name, queryPage) == 0)
{
queryIndex = atoi(value[i]);
}
}
}
if (accountName == NULL || queryIndex < 0)
{
statueCode = 502;
content = mallocString("wrong query parameters format");
}
else
{
//for test, need remove
FILE *logFile = fopen("./test#mail.com.log", "rb");
// FILE *logFile = fopen(accountName, "r");
char *lastLineStr = NULL;
int lineNum = 0;
if (logFile != NULL)
{
//log file is found
char *line = NULL;
size_t len = 0;
ssize_t read;
while( (read = getline(&line, &len, logFile)) != -1)
{
// printf("%s\n", line);
if (strstr(line, finishAction) != 0)
{
/* code */
totalPages ++;
}
lineNum ++;
}
free(line);
int realPage = ceil(totalPages/pageNums);
if (queryIndex > realPage)
{
/* code */
statueCode = 503;
content = mallocString("wrong parameter: query index is beyond the total page");
}
else
{
//log file exist and query index is valid
long startIndex = 0, endIndex = 0, currentIndex = 0;;
startIndex = (queryIndex - 1) * pageNums;
endIndex = (queryIndex) *pageNums;
currentIndex = startIndex;
char buf[256];
int isFinishFound = -1;
int isStartFound = -1;
char *finishContetn[] = {};
char *startContent[] = {};
// this is the core part
while(fgetsr(buf, sizeof(buf), logFile) != NULL && currentIndex lt; endIndex)
{
if (strstr(buf, finishAction) != 0)
{
/* code */
if (isFinishFound > 0)
{
/* code */
continue;
}
else
{
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
}
}// strange part:
else if (strstr(buf, startAction) != 0)
{
//finish is not found, means: a start with no finish pairs
if (isFinishFound < 0)
{
/* code */
continue;
}
else
{
if (isStartFound < 0)
{
/* code */
startContent[currentIndex] = mallocString(buf);
isStartFound = 1;
isFinishFound = -1;
currentIndex ++;
}
else
{
continue;
}
}
}
}
}
}
else
{
//log file is not found
statueCode = 400;
content = mallocString("not found the account log");
// printf("not found\n");
// fprintf(stderr, "%d: %s\n", errno, strerror(errno) );
}
if (logFile)
{
fclose(logFile);
}
}
}
return 0;
}
The libjson and libccgi is placed in the right place and I build and make it like:
/usr/local/bin/clang -I /usr/include -DHAVE_SSL -DCLDMAN -DCLDMAN_USE_RETRY -DUSE_PROXY -c -MMD -fPIC -g -DHAVE_SSL -DCLDMAN -I../../build/include -I../../build/include/curl -I../../build/include/json -I../../build/include/svmdisk -o getLog.o getLog.c
/usr/local/bin/clang -o getLog getLog.o -L../../build/lib -lm -lccgi -ljson
and it has no error in the terminal.
The problem I met is the value of int isStartFound will has a strange value of 134538336. It happen when I debug as following:
in the while, the currentIndex=1 which means it begins to find the second record
it finds the "finish", and it begins to do:
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
After that, it runs to the while again, and now the isStartFound is changed to 134538336.
I also try to add isStartFound to the watch variable. And it also shows in the "strange part"(which I add in the code) the value of isStartFound changes from -1 to 134538336.
I can't find where this value comes from. I doubt that the way I build and link is wrong. But I failed to find it.
Could any one suggest the way how can I look into?
Thanks!
=======edited:
The problem mainly locates the code below:
char buf[256];
int isFinishFound = -1;
int isStartFound = -1;
while(fgetsr(buf, sizeof(buf), logFile) != NULL && currentIndex 0)
{
continue;
}
else
{
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
}
}// here strange happens: the isStartFound changes!
else
{
// other part
}
}
fgetsr is used to read one line of the text; isStartFound&isFinishFound are 2 mask to show whether the "start"/"finish" record is found.
The problem comes with a precondition:the first record is found and now we are try to read the last 5th line(which is the 2nd line). The text file is :
2013/12/31 19:53:54, started, /activeJob/start/ Failed
2013/12/31 19:55:55, ended, retCode = 6, Couldn't resolve host name, /activeJob/finish/ Failed
2014/01/01 08:06:55, started, /activeJob/start/ Failed
2014/03/04 12:16:55, started, /activeJob/start/ Success
2014/03/04 12:17:25, ended, retCode = 0, No error, /activeJob/finish/ success
2014/03/04 13:57:21, started, /activeJob/start/ Success
Now it begins to read the 2nd line and finds "finish", and hence it need to mark the var: isStartFound = -1.
When the program runs to the first "}", the isStartFound is -1. But when it runs to the second "}"(which is the "}" of if (strstr(buf, finishAction) != 0)), the value changes: siStartFound = 134538336!( I add comment in the code) As you can see, here nothing is done!
This is my question and where I feel it weird. (Sorry for the too long code. If this edition still troubles you, please tell me.)
The problem is this declaration:
char *finishContetn[] = {};
This declares finishContetn as an empty array of pointers. Being empty, no matter what index you use to access this array, it will be out of bounds.
As you assign to this array:
finishContetn[currentIndex] = mallocString(buf);
you will write beyond the bounds, and will have undefined behavior. In this case, you will overwrite the stack where other variables are located, like for example the isStartFound variable.
A way to solve this is to either set a fixed size, or to use a dynamic "array". The dynamic array solution requires you to declare the variable as a pointer to pointer (to char) and use realloc to (re)allocate the array.
Something like
char **finishContent = NULL;
size_t finishContentSize = 0; /* Current size of the array */
...
char **temp = realloc(finishContent, sizeof(finishContent[0]) * finishContentSize + 1);
if (temp != NULL)
{
finishContent = temp;
finishContent[finishContentSize++] = malloc(...);
}
Note that I use a temporary variable for the return of realloc, this is because if realloc fails then it won't free finishContent for you, and if you assign directly to finishContent you will loose your original pointer and can't free it later.
Also note that I use sizeof(finishContent[0]). This will work even when finishContent is NULL because sizeof is a pure compile-time operator, it will not create any run-time code.
You might of course need to modify the code to fit your application, but the above should be enough to give you an idea.
I am pulling data from a bzip2 stream within a C application. As chunks of data come out of the decompressor, they can be written to stdout:
fwrite(buffer, 1, length, stdout);
This works great. I get all the data when it is sent to stdout.
Instead of writing to stdout, I would like to process the output from this statement internally in one-line-chunks: a string that is terminated with a newline character \n.
Do I write the output of the decompressor stream to another buffer, one character at a time, until I hit a newline, and then call the per-line processing function? Is this slow and is there a smarter approach? Thanks for your advice.
EDIT
Thanks for your suggestions. I ended up creating a pair of buffers that store the remainder (the "stub" at the end of an output buffer) at the beginning of a short line buffer, each time I pass through the output buffer's worth of data.
I loop through the output buffer character by character and process a newline-line's worth of data at a time. The newline-less remainder gets allocated and assigned, and copied to the next stream's line buffer. It seems like realloc is less expensive than repeated malloc-free statements.
Here's the code I came up with:
char bzBuf[BZBUFMAXLEN];
BZFILE *bzFp;
int bzError, bzNBuf;
char bzLineBuf[BZLINEBUFMAXLEN];
char *bzBufRemainder = NULL;
int bzBufPosition, bzLineBufPosition;
bzFp = BZ2_bzReadOpen(&bzError, *fp, 0, 0, NULL, 0); /* http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzcompress-init */
if (bzError != BZ_OK) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be retrieved\n\n");
return -1;
}
bzError = BZ_OK;
bzLineBufPosition = 0;
while (bzError == BZ_OK) {
bzNBuf = BZ2_bzRead(&bzError, bzFp, bzBuf, sizeof(bzBuf));
if (bzError == BZ_OK || bzError == BZ_STREAM_END) {
if (bzBufRemainder != NULL) {
/* fprintf(stderr, "copying bzBufRemainder to bzLineBuf...\n"); */
strncpy(bzLineBuf, bzBufRemainder, strlen(bzBufRemainder)); /* leave out \0 */
bzLineBufPosition = strlen(bzBufRemainder);
}
for (bzBufPosition = 0; bzBufPosition < bzNBuf; bzBufPosition++) {
bzLineBuf[bzLineBufPosition++] = bzBuf[bzBufPosition];
if (bzBuf[bzBufPosition] == '\n') {
bzLineBuf[bzLineBufPosition] = '\0'; /* terminate bzLineBuf */
/* process the line buffer, e.g. print it out or transform it, etc. */
fprintf(stdout, "%s", bzLineBuf);
bzLineBufPosition = 0; /* reset line buffer position */
}
else if (bzBufPosition == (bzNBuf - 1)) {
bzLineBuf[bzLineBufPosition] = '\0';
if (bzBufRemainder != NULL)
bzBufRemainder = (char *)realloc(bzBufRemainder, bzLineBufPosition);
else
bzBufRemainder = (char *)malloc(bzLineBufPosition);
strncpy(bzBufRemainder, bzLineBuf, bzLineBufPosition);
}
}
}
}
if (bzError != BZ_STREAM_END) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be uncompressed\n\n");
return -1;
} else {
BZ2_bzReadGetUnused(&bzError, bzFp, 0, 0);
BZ2_bzReadClose(&bzError, bzFp);
}
free(bzBufRemainder);
bzBufRemainder = NULL;
I really appreciate everyone's help. This is working nicely.
I don't think there's a smarter approach (except finding an automata library that already does this for you). Be careful with allocating proper size for the "last line" buffer: if it cannot handle arbitrary length and the input comes from something accessible to third parties, it becomes a security risk.
I've also been working with processing bzip2 data per line, and I found that reading one byte at a time was too slow. This worked better for me:
#include <stdio.h>
#include <stdlib.h>
#include <bzlib.h>
/* gcc -o bz bz.c -lbz2 */
#define CHUNK 128
struct bzdata {
FILE *fp;
BZFILE *bzf;
int bzeof, bzlen, bzpos;
char bzbuf[4096];
};
static int bz2_open(struct bzdata *bz, char *file);
static void bz2_close(struct bzdata *bz);
static int bz2_read_line(struct bzdata *bz, char **line, int *li);
static int bz2_buf(struct bzdata *bz, char **line, int *li, int *ll);
static int
bz2_buf(struct bzdata *bz, char **line, int *li, int *ll)
{
int done = 0;
for (; bz->bzpos < bz->bzlen && done == 0; bz->bzpos++) {
if (*ll + 1 >= *li) {
*li += CHUNK;
*line = realloc(*line, (*li + 1) * sizeof(*(*line)));
}
if ( ((*line)[(*ll)++] = bz->bzbuf[bz->bzpos]) == '\n') {
done = 1;
}
}
if (bz->bzpos == bz->bzlen) {
bz->bzpos = bz->bzlen = 0;
}
(*line)[*ll] = '\0';
return done;
}
static int
bz2_read_line(struct bzdata *bz, char **line, int *li)
{
int bzerr = BZ_OK, done = 0, ll = 0;
if (bz->bzpos) {
done = bz2_buf(bz, line, li, &ll);
}
while (done == 0 && bz->bzeof == 0) {
bz->bzlen = BZ2_bzRead(&bzerr, bz->bzf, bz->bzbuf, sizeof(bz->bzbuf));
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
bz->bzpos = 0;
if (bzerr == BZ_STREAM_END) {
bz->bzeof = 1;
}
done = bz2_buf(bz, line, li, &ll);
} else {
done = -1;
}
}
/* Handle last lines that don't have a line feed */
if (done == 0 && ll > 0 && bz->bzeof) {
done = 1;
}
return done;
}
static int
bz2_open(struct bzdata *bz, char *file)
{
int bzerr = BZ_OK;
if ( (bz->fp = fopen(file, "rb")) &&
(bz->bzf = BZ2_bzReadOpen(&bzerr, bz->fp, 0, 0, NULL, 0)) &&
bzerr == BZ_OK) {
return 1;
}
return 0;
}
static void
bz2_close(struct bzdata *bz)
{
int bzerr;
if (bz->bzf) {
BZ2_bzReadClose(&bzerr, bz->bzf);
bz->bzf = NULL;
}
if (bz->fp) {
fclose(bz->fp);
bz->fp = NULL;
}
bz->bzpos = bz->bzlen = bz->bzeof = 0;
}
int main(int argc, char *argv[]) {
struct bzdata *bz = NULL;
int i, lc, li = 0;
char *line = NULL;
if (argc < 2) {
return fprintf(stderr, "usage: %s file [file ...]\n", argv[0]);
}
if ( (bz = calloc(1, sizeof(*bz))) ) {
for (i = 1; i < argc; i++) {
if (bz2_open(bz, argv[i])) {
for (lc = 0; bz2_read_line(bz, &line, &li) > 0; lc++) {
/* Process line here */
}
printf("%s: lines=%d\n", argv[i], lc);
}
bz2_close(bz);
}
free(bz);
}
if (line) {
free(line);
}
return 0;
}
This would be easy to do using C++'s std::string, but in C it takes some code if you want to do it efficiently (unless you use a dynamic string library).
char *bz_read_line(BZFILE *input)
{
size_t offset = 0;
size_t len = CHUNK; // arbitrary
char *output = (char *)xmalloc(len);
int bzerror;
while (BZ2_bzRead(&bzerror, input, output + offset, 1) == 1) {
if (offset+1 == len) {
len += CHUNK;
output = xrealloc(output, len);
}
if (output[offset] == '\n')
break;
offset++;
}
if (output[offset] == '\n')
output[offset] = '\0'; // strip trailing newline
else if (bzerror != BZ_STREAM_END) {
free(output);
return NULL;
}
return output;
}
(Where xmalloc and xrealloc handle errors internally. Don't forget to free the returned string.)
This is almost an order of magnitude slower than bzcat:
lars#zygmunt:/tmp$ wc foo
1193 5841 42868 foo
lars#zygmunt:/tmp$ bzip2 foo
lars#zygmunt:/tmp$ time bzcat foo.bz2 > /dev/null
real 0m0.010s
user 0m0.008s
sys 0m0.000s
lars#zygmunt:/tmp$ time ./a.out < foo.bz2 > /dev/null
real 0m0.093s
user 0m0.044s
sys 0m0.020s
Decide for yourself whether that's acceptable.
I think you should copy chunks of characters to another buffer until the latest chunk you write contains a new line character. Then you can work on the whole line.
You can save the rest of the buffer (after the '\n') into a temporary and then create a new line from it.