int value changed unexpected - c

I meet a problem of a c program: the int variable is changed unexpected.
Below is all about the problem:
I try to read a txt file which looks like:
2013/12/31 19:53:54, started, /activeJob/start/ Failed
2013/12/31 19:55:55, ended, retCode = 6, Couldn't resolve host name, /activeJob/finish/ Failed
2014/01/01 08:06:55, started, /activeJob/start/ Failed
2014/03/04 12:16:55, started, /activeJob/start/ Success
2014/03/04 12:17:25, ended, retCode = 0, No error, /activeJob/finish/ success
2014/03/04 13:57:21, started, /activeJob/start/ Success
It is a log file which will record the start/finish time of a task. I want to parse the log file and find the finished task record in a order time(latest first). For example, I will try to read the last line and it shows that the task is running. Hence I ignore it and continue to read the last 2nd line. In general the next two line which has "ended" and "started" in pairs can be marked as a record.
My environment is: Centos6.5 (installed via VMWaire).
Below is the source code and it uses libccgi:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "json/json.h"
#include "ccgi.h"
#include <errno.h>
const char *queryName = "account";
const char *queryPage = "pageIndex";
const char *startAction = "/activeJob/start/";
const char *finishAction = "/activeJob/finish/";
const char *contentDes[] = {"there is backup processing, start at :","there is no backup"};
const float pageNums = 8.0;
const char * jsonStringCreate(json_object *jsonObj,int statueCode, char *content, int totalPages)
{
json_object_object_add(jsonObj, "statueCode", json_object_new_int(statueCode));
json_object_object_add(jsonObj, "content", json_object_new_string(content));
json_object_object_add(jsonObj, "totalPages", json_object_new_int((int)totalPages));
//the memory of returned string is under control of jsonObj
return json_object_get_string(jsonObj);
}
char *mallocString(char *string)
{
char *returnString = malloc(sizeof(char) * (1 + strlen(string)));
strcpy(returnString, string);
//owner free the returned string
return returnString;
}
/* File must be open with 'b' in the mode parameter to fopen() */
/* Set file position to size of file before reading last line of file */
char* fgetsr(char* buf, int n, FILE* binaryStream)
{
long fpos;
int cpos;
int first = 1;
if (n < 1 || (fpos = ftell(binaryStream)) == -1 || fpos == 0)
return NULL;
cpos = n - 1;
buf[cpos] = '\0';
for (;;)
{
int c;
if (fseek(binaryStream, --fpos, SEEK_SET) != 0 ||
(c = fgetc(binaryStream)) == EOF)
return NULL;
if (c == '\n' && first == 0) /* accept at most one '\n' */
break;
first = 0;
if (c != '\r') /* ignore DOS/Windows '\r' */
{
unsigned char ch = c;
if (cpos == 0)
{
memmove(buf + 1, buf, n - 2);
++cpos;
}
memcpy(buf + --cpos, &ch, 1);
}
if (fpos == 0)
{
fseek(binaryStream, 0, SEEK_SET);
break;
}
}
memmove(buf, buf + cpos, n - cpos);
return buf;
}
</code></pre>
<pre><code>
int main(int argc, char const *argv[], char **env)
{
int statueCode = 0;
int totalPages = 0;
char *content = NULL;
json_object *jsonObj = json_object_new_object();
printf("Content-type: text/plain; encoding=utf-8\n\n");
CGI_varlist *vl;
const char *name;
CGI_value *value;
int i;
if ((vl = CGI_get_all("/tmp/cgi-upload-XXXXXX") ) == 0)
{
// CGI error
// fputs("CGI_get_all() failed\r\n", stdout);
statueCode = 501;
content = mallocString("CGI error");
}
else
{
//get the CGI env parameters, next to get the query parameter
char *accountName = NULL;
int queryIndex = -1;
for (name = CGI_first_name(vl); name != 0; name = CGI_next_name(vl))
{
value = CGI_lookup_all(vl, 0);
for ( i = 0; value[i] != 0; ++i)
{
if (strcmp(name, queryName) == 0)
{
accountName = malloc(sizeof(char) * (strlen(value[i]) + 4 + 1));
strcpy(accountName, value[i]);
strcat(accountName, ".log");
}
else if (strcmp(name, queryPage) == 0)
{
queryIndex = atoi(value[i]);
}
}
}
if (accountName == NULL || queryIndex < 0)
{
statueCode = 502;
content = mallocString("wrong query parameters format");
}
else
{
//for test, need remove
FILE *logFile = fopen("./test#mail.com.log", "rb");
// FILE *logFile = fopen(accountName, "r");
char *lastLineStr = NULL;
int lineNum = 0;
if (logFile != NULL)
{
//log file is found
char *line = NULL;
size_t len = 0;
ssize_t read;
while( (read = getline(&line, &len, logFile)) != -1)
{
// printf("%s\n", line);
if (strstr(line, finishAction) != 0)
{
/* code */
totalPages ++;
}
lineNum ++;
}
free(line);
int realPage = ceil(totalPages/pageNums);
if (queryIndex > realPage)
{
/* code */
statueCode = 503;
content = mallocString("wrong parameter: query index is beyond the total page");
}
else
{
//log file exist and query index is valid
long startIndex = 0, endIndex = 0, currentIndex = 0;;
startIndex = (queryIndex - 1) * pageNums;
endIndex = (queryIndex) *pageNums;
currentIndex = startIndex;
char buf[256];
int isFinishFound = -1;
int isStartFound = -1;
char *finishContetn[] = {};
char *startContent[] = {};
// this is the core part
while(fgetsr(buf, sizeof(buf), logFile) != NULL && currentIndex lt; endIndex)
{
if (strstr(buf, finishAction) != 0)
{
/* code */
if (isFinishFound > 0)
{
/* code */
continue;
}
else
{
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
}
}// strange part:
else if (strstr(buf, startAction) != 0)
{
//finish is not found, means: a start with no finish pairs
if (isFinishFound < 0)
{
/* code */
continue;
}
else
{
if (isStartFound < 0)
{
/* code */
startContent[currentIndex] = mallocString(buf);
isStartFound = 1;
isFinishFound = -1;
currentIndex ++;
}
else
{
continue;
}
}
}
}
}
}
else
{
//log file is not found
statueCode = 400;
content = mallocString("not found the account log");
// printf("not found\n");
// fprintf(stderr, "%d: %s\n", errno, strerror(errno) );
}
if (logFile)
{
fclose(logFile);
}
}
}
return 0;
}
The libjson and libccgi is placed in the right place and I build and make it like:
/usr/local/bin/clang -I /usr/include -DHAVE_SSL -DCLDMAN -DCLDMAN_USE_RETRY -DUSE_PROXY -c -MMD -fPIC -g -DHAVE_SSL -DCLDMAN -I../../build/include -I../../build/include/curl -I../../build/include/json -I../../build/include/svmdisk -o getLog.o getLog.c
/usr/local/bin/clang -o getLog getLog.o -L../../build/lib -lm -lccgi -ljson
and it has no error in the terminal.
The problem I met is the value of int isStartFound will has a strange value of 134538336. It happen when I debug as following:
in the while, the currentIndex=1 which means it begins to find the second record
it finds the "finish", and it begins to do:
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
After that, it runs to the while again, and now the isStartFound is changed to 134538336.
I also try to add isStartFound to the watch variable. And it also shows in the "strange part"(which I add in the code) the value of isStartFound changes from -1 to 134538336.
I can't find where this value comes from. I doubt that the way I build and link is wrong. But I failed to find it.
Could any one suggest the way how can I look into?
Thanks!
=======edited:
The problem mainly locates the code below:
char buf[256];
int isFinishFound = -1;
int isStartFound = -1;
while(fgetsr(buf, sizeof(buf), logFile) != NULL && currentIndex 0)
{
continue;
}
else
{
isFinishFound = 1;
isStartFound = -1;
finishContetn[currentIndex] = mallocString(buf);
}
}// here strange happens: the isStartFound changes!
else
{
// other part
}
}
fgetsr is used to read one line of the text; isStartFound&isFinishFound are 2 mask to show whether the "start"/"finish" record is found.
The problem comes with a precondition:the first record is found and now we are try to read the last 5th line(which is the 2nd line). The text file is :
2013/12/31 19:53:54, started, /activeJob/start/ Failed
2013/12/31 19:55:55, ended, retCode = 6, Couldn't resolve host name, /activeJob/finish/ Failed
2014/01/01 08:06:55, started, /activeJob/start/ Failed
2014/03/04 12:16:55, started, /activeJob/start/ Success
2014/03/04 12:17:25, ended, retCode = 0, No error, /activeJob/finish/ success
2014/03/04 13:57:21, started, /activeJob/start/ Success
Now it begins to read the 2nd line and finds "finish", and hence it need to mark the var: isStartFound = -1.
When the program runs to the first "}", the isStartFound is -1. But when it runs to the second "}"(which is the "}" of if (strstr(buf, finishAction) != 0)), the value changes: siStartFound = 134538336!( I add comment in the code) As you can see, here nothing is done!
This is my question and where I feel it weird. (Sorry for the too long code. If this edition still troubles you, please tell me.)

The problem is this declaration:
char *finishContetn[] = {};
This declares finishContetn as an empty array of pointers. Being empty, no matter what index you use to access this array, it will be out of bounds.
As you assign to this array:
finishContetn[currentIndex] = mallocString(buf);
you will write beyond the bounds, and will have undefined behavior. In this case, you will overwrite the stack where other variables are located, like for example the isStartFound variable.
A way to solve this is to either set a fixed size, or to use a dynamic "array". The dynamic array solution requires you to declare the variable as a pointer to pointer (to char) and use realloc to (re)allocate the array.
Something like
char **finishContent = NULL;
size_t finishContentSize = 0; /* Current size of the array */
...
char **temp = realloc(finishContent, sizeof(finishContent[0]) * finishContentSize + 1);
if (temp != NULL)
{
finishContent = temp;
finishContent[finishContentSize++] = malloc(...);
}
Note that I use a temporary variable for the return of realloc, this is because if realloc fails then it won't free finishContent for you, and if you assign directly to finishContent you will loose your original pointer and can't free it later.
Also note that I use sizeof(finishContent[0]). This will work even when finishContent is NULL because sizeof is a pure compile-time operator, it will not create any run-time code.
You might of course need to modify the code to fit your application, but the above should be enough to give you an idea.

Related

Reading, a set line range from a file in C

I' am writing a C program which allows the user to dynamically specify the File name from which the data is to be read. Next the user enters a lower bound and an upper bound. The data in the lines from between the bounds is to be printed.
For this the main function makes a call: readValues(cTargetName, iLower, iHiger);
The function readValues is supposed to work as follows:
Check if file exist, if yes. Open it with fopen
Read with feof and fgets line by line the whole file, and store each line in char string
With a for loop, print the correct range of lines from the string
I'm not sure why but the while loop doesn't seem to exit although I use the feof statement, which should terminate after the end of the File is reached.
The code looks as follows:
#include <stdio.h>
#include <stdlib.h>
void readValues(char cFileName[75], int n, int m)
{
//Variable declaration;
char strArray[50][50];
char *parser;
int i = 0;
FILE *Data;
if(Data = fopen(cFileName, "rt") == NULL){
printf("File could not be opened");
return 1; //Can you return 1 in a void function?
}
//Read the file line by line
while(feof(Data)==0){
fgets(strArray[i], 200, Data);
i++;
}
//Reading the specified lines
for(n; n<=m; n++){
printf("%s", strArray[n]);
}
}
int main()
{
char cTargetName[75] = {"C:/Users/User1/Desktop/C_Projects_1/TestData.txt"};
int iLower = 2;
int iHiger = 4;
readValues(cTargetName, iLower, iHiger);
return 0;
}
All help is appreciated. Thanks in advance!
Here is my solution to your question:
#include <stdio.h>
#include <stdlib.h>
#define MIN_LINE_LENGTH 64
typedef enum {
false, true
} bool;
int main() {
char filename[PATH_MAX] = {0};
printf("Enter filename:\n");
fgets(filename, PATH_MAX, stdin); // get filename from stdin
char *ptr = filename;
while (*ptr) { // remove trailing newline at the end of filename (fgets() includes newline)
if (*ptr == '\n') {
*ptr = 0;
}
++ptr;
}
printf("Enter starting line and end line, separated by a space:\n");
size_t startLine = 0;
size_t endLine = 0;
bool hasFirstNum = false;
bool hasSecondNum = false;
bool hasMiddleSpace = false;
bool hasLastSpace = false;
size_t numCount = 0;
int ch;
while ((ch = fgetc(stdin)) != EOF && ch != '\n') { // continually receive chars from stdin
if (ch != 32 && !(ch >= 48 && ch <= 57)) { // if not a space or number, raise error
fprintf(stderr, "Only numerical values (and spaces) can be entered.\n");
return 1;
}
if (ch == 32) {
if (hasFirstNum) {
hasMiddleSpace = true;
}
if (hasSecondNum) {
hasLastSpace = true;
}
continue;
}
else if (!hasFirstNum) {
++numCount;
hasFirstNum = true;
}
else if (!hasSecondNum && hasMiddleSpace) {
++numCount;
hasSecondNum = true;
}
else if (hasLastSpace) {
++numCount;
}
if (numCount == 1) {
startLine *= 10;
startLine += ch - 48; // '0' character in ASCII is 48
}
else if (numCount == 2){
endLine *= 10;
endLine += ch - 48;
}
else {
break;
}
}
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error opening file.\n");
return 1;
}
char **lines = malloc(sizeof(char *));
char *line = malloc(MIN_LINE_LENGTH);
*lines = line;
int c;
size_t char_count = 0;
size_t line_count = 1;
while ((c = fgetc(fp)) != EOF) { // continually get chars from file stream
if (c == '\n') { // expand lines pointer if a newline is encountered
*(line + char_count) = 0;
++line_count;
lines = realloc(lines, line_count*sizeof(char *));
line = (*(lines + line_count - 1) = malloc(MIN_LINE_LENGTH));
char_count = 0;
continue;
}
if ((char_count + 1) % MIN_LINE_LENGTH == 0 && char_count != 0) { // expand line pointer if needed
line = realloc(line, char_count + MIN_LINE_LENGTH);
}
*(line + char_count) = c;
++char_count;
}
*(line + char_count) = 0; // to ensure the last line always ends with the null byte
if (startLine >= line_count) { // raise error if starting line specified is greater than num. of lines in doc.
fprintf(stderr, "Specified starting line is less than total lines in document.\n");
return 1;
}
if (endLine > line_count) { // adjust ending line if it is greater than number of lines in doc.
endLine = line_count;
}
if (startLine == 0) { // we will be using the starting index of 1 as the first line
startLine = 1;
}
char **linesPtr = lines + startLine - 1;
while (startLine++ <= endLine) { // print lines
printf("%s\n", *linesPtr++);
}
for (size_t i = 0; i < line_count; ++i) { // free all memory
free(*(lines + i));
}
free(lines);
return 0;
}
It is a little more convoluted, but because it uses dynamic memory allocation, it can handle lines of any length within a text file.
If there is anything unclear, please let me know and I would be happy to explain.
Hope this helps!!
several issues here,
first, you limited the length of lines to 200, not exactly what you might expect to get.
the fgets function returns lines up to specified length unless hit by newline character - this should be taken into account.
additionally, fgets returns NULL if you hit EOF - no real need to use feof.
second, you could save yourself a lot of pain and simply count the number of times you get a string, and for the times you are within the range just print it immediately. will save you a nice amount of overhead
like this:
#include <stdio.h>
#include <stdlib.h>
#define MAXLINE 200//or anything else you want
void readValues(char cFileName[75], int n, int m)
{
//Variable declaration;
char line[MAXLINE];
int i = 0;
FILE *Data;
if((Data = fopen(cFileName, "rt")) == NULL){
printf("File could not be opened");
return 1; //Can you return 1 in a void function?
}
//Read the file line by line and print within range of lines
while((line=fgets(line, MAXLINE,Data))!=NULL){//terminates upon EOF
if (++i>=n&&i<=m)
printf(""%s\n",line);
}
}

How can I solve my segmentation fault core dumped error?

I'm trying to figure out why I keep getting segmentation fault core dumped as an error when I try to run my program. It happens after I run this part of my code:
for(size_t i = 0; i< fLength;i++){
splitPath = strtok(path1, ":");//spltting the string by coln
while(splitPath!=NULL && (pathsFound == 0 || a_flag == 1)){
// if no flag,
// it'll only loop through once if path is found.
// It'll loop through until all paths have been checked and
// printed if there is a flag
copy = strcpy(copy,"");
copy = strcpy(copy,splitPath);
strcat(copy, "/");
strcat(copy, fileName[i]);
if(file_exists(copy)){
printf("\n%s\n",copy);
pathsFound == 1;
}
splitPath = strtok(NULL, ":");
}
if(pathsFound == 0){
return 1;
}
}//end of for
and so from the research, I've been doing it could have something to do with my file_exist function which returns either true or false depending on whether or not a file exists. Apparently most common cases have to do with trying to access a file when you don't have permission? Either way Here is my file_exists function
bool file_exists(char* path) {
//int accessValue = access(path, F_OK);
// access returns -1 if it cannot access file and 0 otherwise
int check = 0;
struct stat buffer;
int exist = stat(path,&buffer);
if(exist == 0)
check = 1;
else // -1
check = 0;
if(check == 1){
return true;
}else{
return false;
}
}
So this is a function I'm creating as a library to include into my main.c file. I just googled how to check if a file exists and tried a couple of different programs. All had the same result. So maybe it's not even my file_exists function.
Can you guys please help me decipher what's going on? Just as a heads up, I'm new to C and this is the first program that I'm making!
EDIT#1:
this is the first part of my code
int a_flag = 0; //counter
int return_value = 0;
const char *fileName[argc]; //size related to amount of arguments
int fLength = 0;
int fileFound;
char* copy;
char *path1 = getenv("PATH"); //file path
char *splitPath;//spltting the string by coln
int pathsFound = 0;//counting the amount of existing paths that have been found
for (size_t i = 1; i < argc; i++){
if (strcmp(argv[i], "-a") == 0){
a_flag = 1;
}else{
if (argv[i][0] == '-'){
printf("Invalid flag!\n");
return 2;
}else{
fileName[fLength]=argv[i];
fLength++;
}
// Either:
// 1. We have an unknown flag
// 2. We have a filename
}
}
printf("%d\n",fLength);
splitPath = strtok(path1, ":");
while(splitPath!=NULL){
printf("\n%s\n", splitPath);
splitPath = strtok(NULL, ":");
}

How to find words with capital letters in a char using c?

I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* loadFile(char* fileName)
{
FILE *inputFile;
inputFile = fopen(fileName, "r");
//finds the end of the file
fseek(inputFile, 0, SEEK_END);
//stores the size of the file
int size = ftell(inputFile);
//Sets the scan to the start of the file
fseek(inputFile, 0, SEEK_SET);
char *documentStore = (char*)malloc(size);
int i = 0, c;
while((c = fgetc(inputFile)) != EOF)
{
documentStore[i] = c;
i++;
}
return documentStore;
}
void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
int uniqueWordCount = 0;
int lengthWordStore = 10;
int i = 0;
int recording = 0;
char wordBuffer[50];
int wordBufferCount = 0;
int isWordPresent = 0;
while(fileContent[i] != EOF)
{
//To allocate more memory incase the structure is full
if(uniqueWordCount == lengthWordStore)
{
lengthWordStore += 10;
char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
importantWords = newWordStore;
frequencyWords = newFrequencyStore;
}
printf("%s", wordBuffer);
//Conditions to fill if its a word
if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
{
wordBuffer[0] = fileContent[i];
recording = 1;
}else if(fileContent[i] >= 'a' && fileContent[i] <= 'z' && recording == 1)
{
//each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
{
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else {
//Adding a terminating character so that it strcpy only copies until that point
wordBuffer[wordBufferCount + 1] = '\0';
recording = 0;
//check to see if that word is in the array already, and if it is, it will just increment the frequency
for(int j = 0; j < uniqueWordCount; j++){
if(strcmp(wordBuffer, importantWords[j]) == 0)
{
frequencyWords[j] += 1;
isWordPresent = 1;
}
}
//if its not present, it should assign it to the structure
if(isWordPresent == 0)
{
char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
strcpy(wordStore, wordBuffer);
uniqueWordCount += 1;
importantWords[uniqueWordCount] = wordStore;
frequencyWords[uniqueWordCount] = 1;
}
}
i++;
}
}
int main() {
char fileName[50];
char *fileContent;
char **importantWords = (char**)malloc(10*sizeof(char**));
int *frequencyWords = (int*)malloc(10*sizeof(int));
printf("Please input the full file path: ");
scanf("%s", fileName);
fileContent = loadFile(fileName);
countImportantWords(fileContent, importantWords, frequencyWords);
int i = 0;
while(importantWords[i] != '\0')
{
printf("%s %d", importantWords[i], frequencyWords[i]);
i++;
}
return 0;
}
I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)
You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
char* data = NULL;
FILE* file = fopen(filename, "r");
if(file == NULL){
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file)+1;
fseek(file, 0, SEEK_SET);
data = (char*)malloc(size);
if(data == NULL){
return NULL;
}
fgets(data, (int)size, file);
return data;
}
typedef struct uppercase_t{
char** word;
int count;
}uppercase;
void copy(uppercase* u,char* token){
size_t length = strlen(token);
u->word[u->count] = (char*)malloc(length+1);
if(u->word[u->count] == NULL){
return;
}
strcpy(u->word[u->count], token);
++u->count;
}
void createuppercasedata(uppercase* u, char* data){
const char delimeter[] = " ";
char* token = strtok(data, delimeter);
if(token == NULL){
return;
}
u->word = (char**)malloc(u->count+1);
if(u->word == NULL){
return;
}
if(isupper(token[0])){
copy(u,token);
}
while(token != NULL){
token = strtok(0, delimeter);
if(token != NULL)
if(isupper(token[0])) {
char** reallocated = (char**)realloc(u->word, u->count+1);
if(reallocated == NULL){
return;
}
u->word = reallocated;
copy(u, token);
}
}
}
void destroyuppercasedata(uppercase* u){
for(int index = 0; index < u->count; ++index){
free(u->word[index]);
}
free(u->word);
}
int main(){
char filename[] = "textfile";
char* data = readfile(filename);
if(data == NULL){
return -1;
}
uppercase u = {0};
createuppercasedata(&u, data);
printf("found %i uppercase words\n",u.count);
for(int index = 0; index < u.count; ++index){
printf("%s\n", u.word[index]);
}
destroyuppercasedata(&u);
free(data);
}
The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.
This was the test file I used.
textfile
How many Uppercase words can Be Found In this text File the answer should be Seven
And this was the output to the terminal:
How
Uppercase
Be
Found
In
File
Seven

fprintf failing, but fputc works just fine?

I am trying to write a function that reads a line of text over a socket (it's part of the code I am writing for an HTTP Server for homework).
It works just fine writing to a file when I am writing using fputc. However, when I try and copy the characters to a buffer, and then use fprintf to print the whole buffer to the file, I don't seem to be getting any output.
Here's the code:
int read_line(int fd, char *buffer, int size) {
char *broken_buffer = (char*) malloc(sizeof(char) * 8096);
char next = '\0';
char err;
int i = 0;
FILE *f = fopen("read_line2.txt", "w");
while (i < size - 1 && next != '\n') {
err = read(fd, &next, 1);
if (err > 0) {
if (next == '\r') {
err = recv(fd, &next, 1, MSG_PEEK);
if (err > 0 && next == '\n') {
read(fd, &next, 1);
} else {
next = '\n';
}
}
fputc(next, f); // Works
broken_buffer[i] = next;
buffer[i] = next;
i++;
} else {
next = '\n';
}
}
broken_buffer[i] = '\0';
buffer[i] = '\0';
FILE *out = fopen("read_line.txt", "w");
fprintf(out, "%s\n", broken_buffer); // Does not work
fclose(out);
fclose(f);
return i;
}
EDIT: I have tried using this alternative function:
int read_socket(int fd, char *buffer, int size) {
int bytes_recvd = 0;
int retries = 0;
int total_recvd = 0;
while (retries < MAX_RETRIES && size > 0 && strstr(buffer, ">") == NULL) {
bytes_recvd = read(fd, buffer, size);
if (bytes_recvd > 0) {
buffer += bytes_recvd;
size -= bytes_recvd;
total_recvd += bytes_recvd;
} else {
retries++;
}
}
if (bytes_recvd >= 0) {
// Last read was not an error, return how many bytes were recvd
return total_recvd;
}
// Last read was an error, return error code
return -1;
}
And I have no problems printing this one out with fprintf.
EDIT2: I have figured out that i is somehow 0 after the loop, so the first character is being overwritten with a '\0'. However, when I put in debugging code to print out the value of i within the loop, I found it being incremented up to 22 (23 being the final value at which the loop breaks). How is this even possible? The resulting string is:
GET /blah.txt HTTP/1.1
Is the value of next ever 0? If it is 0 then that value will go into broken_buffer, which means that fprintf will think it's at the end of a string before you explicitly put the null there yourself.
The problem turned out to be two processes that were both connecting to the server (thanks, Google Chrome...), and both were writing to the same file somehow. The code WAS working correctly.

How do I handle a stream of data internal to a C-based app?

I am pulling data from a bzip2 stream within a C application. As chunks of data come out of the decompressor, they can be written to stdout:
fwrite(buffer, 1, length, stdout);
This works great. I get all the data when it is sent to stdout.
Instead of writing to stdout, I would like to process the output from this statement internally in one-line-chunks: a string that is terminated with a newline character \n.
Do I write the output of the decompressor stream to another buffer, one character at a time, until I hit a newline, and then call the per-line processing function? Is this slow and is there a smarter approach? Thanks for your advice.
EDIT
Thanks for your suggestions. I ended up creating a pair of buffers that store the remainder (the "stub" at the end of an output buffer) at the beginning of a short line buffer, each time I pass through the output buffer's worth of data.
I loop through the output buffer character by character and process a newline-line's worth of data at a time. The newline-less remainder gets allocated and assigned, and copied to the next stream's line buffer. It seems like realloc is less expensive than repeated malloc-free statements.
Here's the code I came up with:
char bzBuf[BZBUFMAXLEN];
BZFILE *bzFp;
int bzError, bzNBuf;
char bzLineBuf[BZLINEBUFMAXLEN];
char *bzBufRemainder = NULL;
int bzBufPosition, bzLineBufPosition;
bzFp = BZ2_bzReadOpen(&bzError, *fp, 0, 0, NULL, 0); /* http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzcompress-init */
if (bzError != BZ_OK) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be retrieved\n\n");
return -1;
}
bzError = BZ_OK;
bzLineBufPosition = 0;
while (bzError == BZ_OK) {
bzNBuf = BZ2_bzRead(&bzError, bzFp, bzBuf, sizeof(bzBuf));
if (bzError == BZ_OK || bzError == BZ_STREAM_END) {
if (bzBufRemainder != NULL) {
/* fprintf(stderr, "copying bzBufRemainder to bzLineBuf...\n"); */
strncpy(bzLineBuf, bzBufRemainder, strlen(bzBufRemainder)); /* leave out \0 */
bzLineBufPosition = strlen(bzBufRemainder);
}
for (bzBufPosition = 0; bzBufPosition < bzNBuf; bzBufPosition++) {
bzLineBuf[bzLineBufPosition++] = bzBuf[bzBufPosition];
if (bzBuf[bzBufPosition] == '\n') {
bzLineBuf[bzLineBufPosition] = '\0'; /* terminate bzLineBuf */
/* process the line buffer, e.g. print it out or transform it, etc. */
fprintf(stdout, "%s", bzLineBuf);
bzLineBufPosition = 0; /* reset line buffer position */
}
else if (bzBufPosition == (bzNBuf - 1)) {
bzLineBuf[bzLineBufPosition] = '\0';
if (bzBufRemainder != NULL)
bzBufRemainder = (char *)realloc(bzBufRemainder, bzLineBufPosition);
else
bzBufRemainder = (char *)malloc(bzLineBufPosition);
strncpy(bzBufRemainder, bzLineBuf, bzLineBufPosition);
}
}
}
}
if (bzError != BZ_STREAM_END) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be uncompressed\n\n");
return -1;
} else {
BZ2_bzReadGetUnused(&bzError, bzFp, 0, 0);
BZ2_bzReadClose(&bzError, bzFp);
}
free(bzBufRemainder);
bzBufRemainder = NULL;
I really appreciate everyone's help. This is working nicely.
I don't think there's a smarter approach (except finding an automata library that already does this for you). Be careful with allocating proper size for the "last line" buffer: if it cannot handle arbitrary length and the input comes from something accessible to third parties, it becomes a security risk.
I've also been working with processing bzip2 data per line, and I found that reading one byte at a time was too slow. This worked better for me:
#include <stdio.h>
#include <stdlib.h>
#include <bzlib.h>
/* gcc -o bz bz.c -lbz2 */
#define CHUNK 128
struct bzdata {
FILE *fp;
BZFILE *bzf;
int bzeof, bzlen, bzpos;
char bzbuf[4096];
};
static int bz2_open(struct bzdata *bz, char *file);
static void bz2_close(struct bzdata *bz);
static int bz2_read_line(struct bzdata *bz, char **line, int *li);
static int bz2_buf(struct bzdata *bz, char **line, int *li, int *ll);
static int
bz2_buf(struct bzdata *bz, char **line, int *li, int *ll)
{
int done = 0;
for (; bz->bzpos < bz->bzlen && done == 0; bz->bzpos++) {
if (*ll + 1 >= *li) {
*li += CHUNK;
*line = realloc(*line, (*li + 1) * sizeof(*(*line)));
}
if ( ((*line)[(*ll)++] = bz->bzbuf[bz->bzpos]) == '\n') {
done = 1;
}
}
if (bz->bzpos == bz->bzlen) {
bz->bzpos = bz->bzlen = 0;
}
(*line)[*ll] = '\0';
return done;
}
static int
bz2_read_line(struct bzdata *bz, char **line, int *li)
{
int bzerr = BZ_OK, done = 0, ll = 0;
if (bz->bzpos) {
done = bz2_buf(bz, line, li, &ll);
}
while (done == 0 && bz->bzeof == 0) {
bz->bzlen = BZ2_bzRead(&bzerr, bz->bzf, bz->bzbuf, sizeof(bz->bzbuf));
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
bz->bzpos = 0;
if (bzerr == BZ_STREAM_END) {
bz->bzeof = 1;
}
done = bz2_buf(bz, line, li, &ll);
} else {
done = -1;
}
}
/* Handle last lines that don't have a line feed */
if (done == 0 && ll > 0 && bz->bzeof) {
done = 1;
}
return done;
}
static int
bz2_open(struct bzdata *bz, char *file)
{
int bzerr = BZ_OK;
if ( (bz->fp = fopen(file, "rb")) &&
(bz->bzf = BZ2_bzReadOpen(&bzerr, bz->fp, 0, 0, NULL, 0)) &&
bzerr == BZ_OK) {
return 1;
}
return 0;
}
static void
bz2_close(struct bzdata *bz)
{
int bzerr;
if (bz->bzf) {
BZ2_bzReadClose(&bzerr, bz->bzf);
bz->bzf = NULL;
}
if (bz->fp) {
fclose(bz->fp);
bz->fp = NULL;
}
bz->bzpos = bz->bzlen = bz->bzeof = 0;
}
int main(int argc, char *argv[]) {
struct bzdata *bz = NULL;
int i, lc, li = 0;
char *line = NULL;
if (argc < 2) {
return fprintf(stderr, "usage: %s file [file ...]\n", argv[0]);
}
if ( (bz = calloc(1, sizeof(*bz))) ) {
for (i = 1; i < argc; i++) {
if (bz2_open(bz, argv[i])) {
for (lc = 0; bz2_read_line(bz, &line, &li) > 0; lc++) {
/* Process line here */
}
printf("%s: lines=%d\n", argv[i], lc);
}
bz2_close(bz);
}
free(bz);
}
if (line) {
free(line);
}
return 0;
}
This would be easy to do using C++'s std::string, but in C it takes some code if you want to do it efficiently (unless you use a dynamic string library).
char *bz_read_line(BZFILE *input)
{
size_t offset = 0;
size_t len = CHUNK; // arbitrary
char *output = (char *)xmalloc(len);
int bzerror;
while (BZ2_bzRead(&bzerror, input, output + offset, 1) == 1) {
if (offset+1 == len) {
len += CHUNK;
output = xrealloc(output, len);
}
if (output[offset] == '\n')
break;
offset++;
}
if (output[offset] == '\n')
output[offset] = '\0'; // strip trailing newline
else if (bzerror != BZ_STREAM_END) {
free(output);
return NULL;
}
return output;
}
(Where xmalloc and xrealloc handle errors internally. Don't forget to free the returned string.)
This is almost an order of magnitude slower than bzcat:
lars#zygmunt:/tmp$ wc foo
1193 5841 42868 foo
lars#zygmunt:/tmp$ bzip2 foo
lars#zygmunt:/tmp$ time bzcat foo.bz2 > /dev/null
real 0m0.010s
user 0m0.008s
sys 0m0.000s
lars#zygmunt:/tmp$ time ./a.out < foo.bz2 > /dev/null
real 0m0.093s
user 0m0.044s
sys 0m0.020s
Decide for yourself whether that's acceptable.
I think you should copy chunks of characters to another buffer until the latest chunk you write contains a new line character. Then you can work on the whole line.
You can save the rest of the buffer (after the '\n') into a temporary and then create a new line from it.

Resources