RAMFS mounting program - c

I rewrote a bash script into a standalone C program for mounting the current directory into the ramfs. However I still use "system()" for some of the functions. The only thing that still bothers me is the use of C mount function. In my code I call it with a system call but I want to know what is the right use in my case for C`s system.
int main(int argc, char* argv[])
{
(void) argc; // unused
bool matched_result = false;
// extra size
char pwd[512]={0};
getcwd(pwd, sizeof(pwd)/sizeof(pwd[0]));
printf("Current working dir is: %s\n", pwd);
// Check is already mounted ramfs on the current directory.
// The beter way is manual parsing.
// in order to avoid that process we have to write dirwalker
// to list all mounted fses but for now will use this one
FILE* mntstatus = popen("mount", "r");
// assume no more than 512 chars per line
char buff[512]={0};
while (fgets(buff, 512, mntstatus) != NULL) {
char* match = strchr(buff, '/');
// trim 3 whitespaces from the end
trim_end(match, ' ', 3);
if (strcmp(match, pwd)==0) {
printf("Match: [%s]\n", match);
matched_result = true;
break; // no need to search more
}
}
// our cwd is not mounted
if (!matched_result) {
// empty, ok we can mount here
// just efective-uid is not enough for mount
// i need real-uid
setuid(0);
int res = mount_filesystem(pwd, "ramfs", "ramfs", 0, "rw", 0);
//int res = system("mount -t ramfs ramfs `pwd`");
if (res == 0) {
printf("Mount ok\n");
// convert to octals
int octal_perms = strtol(g_Permissions, 0, 8);
if (chmod(pwd, octal_perms) < 0) {
fprintf(stderr, "%s: error in chmod(%s, %s) - %d (%s)\n",
argv[0], // program
pwd, // current dir
g_Permissions, // with permissions
errno, // Ermac :)
strerror(errno));
exit(1);
}
} else {
printf("Mount failed!\n");
return 1;
}
} else {
printf("Dude you are ok!\n");
}
return 0;
}
void trim_end(char *str, const char delim, int count)
{
char* begin = str;
char* end = &str[strlen(str)-1];
int i=0;
while ((i < count) && ( begin != end)) {
if (*end == delim) {
i++;
}
*end-- = 0;
}
}
void register_exit_callback(cbAtExit ex)
{
atexit(ex);
}
int mount_filesystem(const char *src, const char *tgt, const char *fstype, unsigned long flags,
const char *mode, const char *uid)
{
char mode_uid[256]={0};
if((mode != NULL) && (uid != NULL)) {
sprintf(mode_uid, "mode=%s,uid=%s", mode, uid);
}
int result = mount(src, tgt, fstype, flags, mode_uid);
// handle result outside
return result;
}

Related

Split string into two variables in C

I have been givin an assignment which uses C to read a given file and input data into a binary tree. My current problem is splitting the line read from the file into two different variables.
The file that has been given contains two bits of data, an ID and some information. 2409, blah, blah, blah
Currently, the program is reading the file correctly and storing each line and then displaying it. I have tried to use token's, memmove and trying to simply select the characters manually however this needs to be dynamic. The ID is not a fixed amount of numbers so manually selecting it will not work. As mentioned, I have tried to use strtok using ", " as a delimited however it just doesn't change anything.
This is currently what I am using to display the information, I intent to split the string within the while loop for each line:
int main() {
struct node* root = NULL;
FILE *file;
char filename[15];
char buff[255];
char line[128];
strcpy(filename, "file.txt");
file = fopen(filename, "r");
if (file == NULL) {
printf("File could not be openned.\n");
exit(0);
}
while (line != NULL)
{
strcpy(line, fgets(buff, 255, file));
printf("%s", line);
}
fclose(file);
}
Is there any way that I am able to simply select the first characters up to the first occurance of "," and convert them into an integer. Then select the rest of the data removing the first "ID, " and insert that into a char variable.
Your help is greatly appreciated.
Like #LPs suggested, and assuming each line is like "2019, blah, blah, blah", you can get the ID for each line by calling:
int id = atoi(strtok(line, ","));
If one wants to parse files like,
2409, blah, blah, blah
0x10,foo, bar, baz, qux
# This is more difficult.
010 , a\
a, b b\#\\\,still b,c
one is probably better off just using a parser generator like lex and yacc or my favourite, re2c.
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <assert.h>
/* Tokens. */
#define PARAM(A) A
#define STRINGISE(A) #A
#define TOKENS(X) X(ERROR), X(END), X(COMMA), X(NEWLINE), \
X(ESCAPE), X(WSP), X(NUMBER), X(WORD)
enum Token { TOKENS(PARAM) };
static const char *const tokens[] = { TOKENS(STRINGISE) };
struct Lexer { size_t line; char *marker, *from, *cursor; };
static enum Token lex(struct Lexer *lexer) {
assert(lexer);
/*!re2c
re2c:yyfill:enable = 0;
re2c:define:YYCTYPE = char;
re2c:define:YYCURSOR = lexer->cursor;
re2c:define:YYMARKER = lexer->marker; // Rules overlap.
newline = "\n" | ("\r" "\n"?);
oct = "0" [0-7]*;
dec = [1-9][0-9]*;
hex = '0x' [0-9a-fA-F]+;
num = oct | dec | hex;
word = [^\x00\\\n\r \t\v\f,0-9]+;
comment = "#" [^\x00\n\r]* newline;
*/
scan:
lexer->from = lexer->cursor;
/*!re2c
* { return ERROR; }
"\x00" { return END; }
[ \t\v\f]+ { return WSP; }
newline { lexer->line++; return NEWLINE; }
"\\\n" | comment { lexer->line++; goto scan; }
"\\\\" | "\\," | "\\ " | "\\n" | "\\#" { return ESCAPE; }
"," { return COMMA; }
word { return WORD; }
num { return NUMBER; }
*/
}
struct Buffer {
char *data;
size_t size, capacity;
};
static char *buffer_reserve(struct Buffer *const buf, const size_t reserve) {
const size_t min = buf->size + reserve;
size_t c = buf->capacity;
char *data;
assert(buf);
if(reserve > (size_t)-1 - buf->size || min > ((size_t)-1 >> 1) + 1)
{ errno = ERANGE; return 0; }
if(min > c) {
if(!c) c = 1;
while(min <= c) c <<= 1;
if(!(data = realloc(buf->data, c))) return 0;
buf->data = data;
buf->capacity = c;
}
return buf->data + buf->size;
}
struct Word { char *start, *end; };
struct Parser {
int id, id_set, first_comma;
size_t num_words;
struct Word words[64]; /* Lazy. */
char *start_words, *end_words;
};
static size_t parser_max_words = sizeof ((struct Parser *)0)->words
/ sizeof *((struct Parser *)0)->words;
static void clear_parser(struct Parser *const parser) {
assert(parser);
parser->id_set = 0;
parser->first_comma = 1;
parser->num_words = 0;
parser->start_words = parser->end_words = 0;
}
static void print_parser(const struct Parser *const parser) {
const struct Word *word = parser->words,
*word_end = parser->words + parser->num_words;
assert(parser && parser->id_set && parser->num_words <= parser_max_words);
printf("#%d: ", parser->id);
for( ; word < word_end; word++) {
if(word != parser->words) printf(", ");
if(!word->start) { printf("<null>"); continue; }
assert(word->start <= word->end);
if(word->start == word->end) { printf("<empty>"); continue; }
printf("<%.*s>", (int)(word->end - word->start), word->start);
}
fputc('\n', stdout);
}
static void expand_word(struct Parser *const parser,
const struct Lexer *const lexer) {
assert(parser && lexer && lexer->from < lexer->cursor);
if(!parser->start_words) {
assert(!parser->end_words);
parser->start_words = lexer->from;
}
parser->end_words = (lexer->from + INT_MAX >= lexer->cursor) ?
lexer->cursor : lexer->from + INT_MAX;
}
static int store_word(struct Parser *const parser) {
struct Word *word;
assert(parser);
if(parser->num_words >= parser_max_words) return errno = EILSEQ, 0;
word = parser->words + parser->num_words++;
word->start = parser->start_words;
word->end = parser->end_words;
parser->start_words = parser->end_words = 0;
return 1;
}
int main(int argc, char **argv) {
const size_t granularity = 1024;
struct Lexer lexer = { 1, 0, 0, 0 };
struct Parser parser;
size_t nread;
struct Buffer buf = { 0, 0, 0 };
char *b;
FILE *fp = 0;
int success = 0, end_of_buffer = 0;
/* Open. */
if(argc != 2) return fprintf(stderr, "Needs filename.\n"), EXIT_FAILURE;
if(!(fp = fopen(argv[1], "r"))) goto catch;
/* Read. */
do {
if(!(b = buffer_reserve(&buf, granularity))) goto catch;
nread = fread(b, 1, granularity, fp);
buf.size += nread;
} while(nread == granularity);
if(ferror(fp)) goto catch;
fclose(fp), fp = 0;
if(!(b = buffer_reserve(&buf, 1))) goto catch;
*b = '\0'; /* Make sure it's a string. */
/* Parse. */
lexer.cursor = buf.data;
clear_parser(&parser);
do {
enum Token tok;
switch((tok = lex(&lexer))) {
case ERROR: goto catch;
case END: end_of_buffer = 1; break;
case COMMA:
if(!parser.id_set) { errno = EILSEQ; goto catch; }
if(parser.first_comma) { parser.first_comma = 0; break; }
if(!store_word(&parser)) goto catch;
break;
case NEWLINE:
if(parser.id_set) {
/* We require at least key, data. */
if(!store_word(&parser)) goto catch;
print_parser(&parser);
clear_parser(&parser);
} else if(parser.start_words) {
errno = EILSEQ; goto catch;
}
break;
case ESCAPE:
if(!parser.id_set) { errno = EILSEQ; goto catch; }
expand_word(&parser, &lexer);
break;
case WSP: break;
case NUMBER:
if(parser.id_set) {
expand_word(&parser, &lexer);
} else {
char *end;
long i = strtol(lexer.from, &end, 0);
if(end != lexer.cursor || i < INT_MIN || i > INT_MAX)
{ errno = EDOM; goto catch; }
parser.id = (int)i;
parser.id_set = 1;
}
break;
case WORD:
expand_word(&parser, &lexer);
break;
}
} while(!end_of_buffer);
success = EXIT_SUCCESS;
goto finally;
catch:
fprintf(stderr, "While on line %lu.\n", (unsigned long)lexer.line);
perror("parsing");
assert(!lexer.from || (lexer.from < lexer.cursor
&& lexer.from + INT_MAX >= lexer.cursor));
if(lexer.from) fprintf(stderr, "While on %.*s.\n",
(int)(lexer.cursor - lexer.from), lexer.from);
finally:
free(buf.data);
if(fp) fclose(fp);
return success;
}
Prints,
#2409: <blah>, <blah>, <blah>
#16: <foo>, <bar>, <baz>, <qux>
#8: <a\
a>, <b b\#\\\,still b>, <c>
but that's probably overkill.
As #HAL9000 mentioned, I was able to complete this by using sscanf. Simply extracting the integer and string from the line using sscanf(line, "%d %[^\n]s", &ID, details);
I did try using strtok however, couldn't get my head around it as it wasn't working. sscanf was the easiest to do so this is what I am going to use, thanks.
Using sscanf
e.g
int main(int argc, char *argv[]) {
const char *str = "123, this, is, a test ;##";
char buff[128] = {0};
int num = 0;
if (2 == sscanf(str, "%d,%[^\r\n]s", &num, buff))
printf("== num: %d, string: '%s'\n", num, buff);
else
printf("== Wrong!\n");
return 0;
}
result: == num: 123, string: ' this, is, a test ;##'

Program doesn't segfault only when stepping through debugger (gdb)

This program I wrote to automatically turn my network's auto configuration logic only segfaults if I run through the program without breaking. When running through a debugger with breakpoints, it runs perfectly fine. When run through a debugger without breakpoints, it segfaults. Is there a bug in this program that I'm not noticing?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define COMLEN 64
#define BUFSIZE 128
const char *program_name;
typedef enum errors
{
SUCCESS = 0,
MEM_ERROR,
FILE_ERROR,
USAGE_ERROR,
} Error;
void terminal_error(Error e)
{
switch (e)
{
case MEM_ERROR:
fputs("Memory error!\n", stderr);
break;
case FILE_ERROR:
fputs("File error!\n", stderr);
break;
case USAGE_ERROR:
fprintf(stderr, "Incorrect usage. Try %s [y|n]\n", program_name);
break;
default:
fputs("Unknown error!\n", stderr);
break;
}
exit(e);
}
Error read_file(FILE *fp, char **buffer)
{
long len;
fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);
if ((*buffer = malloc(len+1)) == NULL)
return MEM_ERROR;
if (fread(*buffer, 1, len, fp) != len)
return FILE_ERROR;
buffer[len+1] = '\0';
return SUCCESS;
}
Error get_interface(char *name)
{
const char *cmd = "netsh wlan show networks";
FILE *fp;
// open pipe to parse output from command and run command
if ((fp = popen(cmd, "rb")) == NULL)
return FILE_ERROR;
// read the piped output into buffer
char *buffer = NULL;
Error res;
res = read_file(fp, &buffer);
if (res != SUCCESS)
return res;
fclose(fp);
// parse the buffer
// needle delimits the start of the interface name in the command
// end_needle delimits the end of the interface name
const char *needle = "Interface name : ";
const char *end_needle = " \r\nThere";
// end will point to end_needle (end of interface name)
char *end = NULL;
// name points to one-past the end of the needle (start of interface
// name)
name = strstr(buffer, needle);
name += strlen(needle);
end = strstr(buffer, end_needle);
*end = '\0'; // terminates the interface name
return SUCCESS;
}
int main(int argc, const char *argv[])
{
const char *template = "netsh wlan set autoconfig enabled=%s interface=\"%s\"";
char interface_name[BUFSIZE] = {0};
program_name = argv[0];
Error res = SUCCESS;
// get name of wireless interfacew
res = get_interface(interface_name);
if (res != SUCCESS)
terminal_error(res);
char *op;
if (argc > 1)
// arguments were provided
{
if (strcmp(argv[1], "y") == 0)
op = "yes";
else if (strcmp(argv[1], "n") == 0)
op = "no";
else
terminal_error(USAGE_ERROR);
char command[COMLEN];
sprintf(command, template, op, interface_name);
system(command);
}
else
// no arguments were provided
{
system("netsh wlan show settings");
}
return 0;
}

Trying to create a program to check palindromes and semordinlap

I'm trying to create a program to check if a given string is a palindrome or an emordinlap (or reverse pair), however when running my program it's outputting that a string is a palindrome but not a reverse pair (such as racecar should be both).
I've been debugging and starring at this compute screen for 3 hours, and I have no idea what's going on. My first instinct was that it's coming from the checkPali function, so I assigned it to pointers, no luck, same issue.
My second guess was to put printf statements everywhere (I cleaned them up), no luck there either.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
static void *helpPage(void)
{
puts("usage: epi -h -t -s [string] [file]\n");
puts("-h Print this help and exit\n");
puts("-t Run the test strings.\n");
puts("-s Input a string to check if it is either a palindrome or");
puts(" a emordinlap, followed by the path to a file.");;
}
static char *reverse(char *str)
{
// reverse a given string
char tmp, *src, *dst;
size_t len;
if (str != NULL)
{
len = strlen(str);
if (len > 1)
{
src = str;
dst = src + len - 1;
while (src < dst)
{
tmp = *src;
*src++ = *dst;
*dst-- = tmp;
}
}
}
return str;
}
static char *strip(char *s)
{
// strip a string of a new line
return strtok(s, "\n");
}
static bool checkEpi(char *reversed, char *filePath)
{
// check if the word matches in a given file
// or if the word is an emordnilap
FILE *wordList;
char *line = NULL;
size_t len = 0;
ssize_t read;
wordList = fopen(filePath, "r");
if (wordList == NULL)
{
perror("Failed to open file: "); // file probably doesn't exit
}
while ((read = getline(&line, &len, wordList)) != -1) // read the file line by line
{
if (strip(line) == reversed)
{
return true; // return true if the word matches
}
}
fclose(wordList);
}
static bool checkPali(char *origin, char *reversed)
{
// check if a given word is a palindrome or not
if (*origin == *reversed)
return true;
}
static void checkAll(char *origin, char* reverse, char *filePath)
{
// basically a main function to check if it's a palindrome or a emordnilap
bool paliRes = checkPali(origin, reverse);
bool epiRes = checkEpi(reverse, filePath);
if (paliRes == true)
{
printf("\n%s is a palindrome, it is the same forward and backwards\n", origin);
}
else
{
printf("\n%s is not a palindrome, it is not the same forward and backwards\n", origin);
}
if (epiRes == true)
{
printf("Reverse of %s is a emordinlap, it spells a word backwards.\n\n", origin);
}
else
{
printf("Reverse of %s is not a emordinlap, it does not spell a word backwards\n\n", origin);
}
}
int main(int argc, char *argv[])
{
if (argv[1] == NULL)
{
puts("\nYou failed to pass a valid flag...\n");
helpPage();
return 1;
}
else
{
char *testStrings[] = {"a", "ab", "abc", "another", "cbc", "|0|", "palindrome"};
int i;
char s[10000];
char *defaultWordList = "/usr/share/dict/american-english";
size_t optInt;
for (optInt = 1; optInt < argc && argv[optInt][0] == '-'; optInt++)
{
switch(argv[optInt][1])
{
case 't':
{
for (i = 0; i < sizeof(testStrings) / sizeof(testStrings[0]); i++)
{
strcpy(s, testStrings[i]);
char *origin = testStrings[i];
char *revStr = reverse(s);
checkAll(origin, revStr, defaultWordList);
}
return 0;
}
case 's':
{
if (argv[2] == NULL)
{
puts("\nYou must provide a string to test.\n");
helpPage();
return 1;
}
else if (argv[3] == NULL)
{
puts("\nYou must pass a valid file path to use as a wordlist.\n");
helpPage();
return 1;
}
else
{
//strcpy(s, argv[2]);
char *origin = argv[2];
char *revStr = reverse(argv[2]);
checkAll(origin, revStr, argv[3]);
return 0;
}
}
case 'h': helpPage(); return 0;
}
}
return 0;
}
}
What am I doing wrong to where my statements are not comparing correctly?
You can't meaningfully compare strings using == in C.
if (*origin == *reversed)
This just compares the first char of each of the two parameters. Try strcmp instead.
static bool checkPali(char *origin, char *reversed)
{
/* match if result of strcmp is 0 */
return strcmp(origin, reversed) == 0;
}
You will need a similar change for
if (strip(line) == reversed)

How would you print the name of a file when it's read in as a pointer to an array?

So, in this program I need to print the name of the file specified by the user when they run the program ./tstats input.txt. My problem is when I try to print out what the pointer is pointing too I get this: 84 18 407 ffbff2a4. Now I know exactly what the first 3 set of numbers are however that address I know has to with the pointer. I'm aware that my format for the print out needs to be in a string format however you can't print out a pointer in the format of a string. So here lies my question, how would you print out the name of the file by using what code I have now and my pointer to the input_from_args src?
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
enum state
{
START,
WORD,
DELIM,
};
FILE*
input_from_args(int argc, const char *argv[])
{
if (argc == 1) {
return stdin;
}
else {
return fopen(argv[1], "r");
}
}
void
wcount(FILE *src, FILE *dest)
{
int ch, wc, lc, cc;
enum state cstate;
wc = lc = cc = 0;
cstate = START;
while ((ch = fgetc(src)) != EOF){
cc++;
switch (cstate) {
case START:
if (isspace(ch)) {
cstate = DELIM;
if (ch == '\n') {
lc++;
}
}
else {
cstate = WORD;
wc++;
}
break;
case DELIM:
if (ch == '\n') {
lc++;
}
else if (!isspace(ch)) {
cstate = WORD;
wc++;
}
break;
case WORD:
if (isspace(ch)) {
cstate = DELIM;
if (ch == '\n') {
lc++;
}
}
break;
}
}
fprintf(dest, "%4d\t%4d\t%4d\t%10p\n", wc, lc, cc, &src);
}
int
main(int argc, const char *argv[])
{
FILE *src = input_from_args(argc, argv);
FILE *dest = stdout;
if (src == NULL) {
fprintf(stderr, "%s: unable to open %s\n", argv[0], argv[1]);
exit(EXIT_FAILURE);
}
wcount(src, dest);
fclose(src);
return EXIT_SUCCESS;
}
edit Question 2 code:
int
main(int argc, char* argv[])
{
int i;
FILE *src = input_from_args(argc, argv);
FILE *dest = stdout;
for (i = 1; i < argc; i++)
{
if ((src = fopen(argv[i], "r")) == NULL)
{
fprintf(stderr, "%s: unable to open %s\n", argv[0], argv[i]);
}
wcount(src, dest, get_filename_from_args(argc, argv[i]));
fclose(src);
}
return EXIT_SUCCESS;
}
My suggestion:
Create a function that returns the filename to be printed by wcount:
char* get_filename_from_args(int argc, char* argv[])
{
static char stdin_name[] = "-";
if (argc == 1) {
return stdin_name;
}
else {
return argv[1];
}
}
Change the signature of wcount:
void wcount(FILE *src, FILE *dest, char* src_filename)
Change the call to wcount:
wcount(src, dest, get_filename_from_args(argc, argv));
Change the implementation of the line in wcount where you write out the details:
fprintf(dest, "%4d\t%4d\t%4d\t%s\n", wc, lc, cc, src_filename);

How do I handle a stream of data internal to a C-based app?

I am pulling data from a bzip2 stream within a C application. As chunks of data come out of the decompressor, they can be written to stdout:
fwrite(buffer, 1, length, stdout);
This works great. I get all the data when it is sent to stdout.
Instead of writing to stdout, I would like to process the output from this statement internally in one-line-chunks: a string that is terminated with a newline character \n.
Do I write the output of the decompressor stream to another buffer, one character at a time, until I hit a newline, and then call the per-line processing function? Is this slow and is there a smarter approach? Thanks for your advice.
EDIT
Thanks for your suggestions. I ended up creating a pair of buffers that store the remainder (the "stub" at the end of an output buffer) at the beginning of a short line buffer, each time I pass through the output buffer's worth of data.
I loop through the output buffer character by character and process a newline-line's worth of data at a time. The newline-less remainder gets allocated and assigned, and copied to the next stream's line buffer. It seems like realloc is less expensive than repeated malloc-free statements.
Here's the code I came up with:
char bzBuf[BZBUFMAXLEN];
BZFILE *bzFp;
int bzError, bzNBuf;
char bzLineBuf[BZLINEBUFMAXLEN];
char *bzBufRemainder = NULL;
int bzBufPosition, bzLineBufPosition;
bzFp = BZ2_bzReadOpen(&bzError, *fp, 0, 0, NULL, 0); /* http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzcompress-init */
if (bzError != BZ_OK) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be retrieved\n\n");
return -1;
}
bzError = BZ_OK;
bzLineBufPosition = 0;
while (bzError == BZ_OK) {
bzNBuf = BZ2_bzRead(&bzError, bzFp, bzBuf, sizeof(bzBuf));
if (bzError == BZ_OK || bzError == BZ_STREAM_END) {
if (bzBufRemainder != NULL) {
/* fprintf(stderr, "copying bzBufRemainder to bzLineBuf...\n"); */
strncpy(bzLineBuf, bzBufRemainder, strlen(bzBufRemainder)); /* leave out \0 */
bzLineBufPosition = strlen(bzBufRemainder);
}
for (bzBufPosition = 0; bzBufPosition < bzNBuf; bzBufPosition++) {
bzLineBuf[bzLineBufPosition++] = bzBuf[bzBufPosition];
if (bzBuf[bzBufPosition] == '\n') {
bzLineBuf[bzLineBufPosition] = '\0'; /* terminate bzLineBuf */
/* process the line buffer, e.g. print it out or transform it, etc. */
fprintf(stdout, "%s", bzLineBuf);
bzLineBufPosition = 0; /* reset line buffer position */
}
else if (bzBufPosition == (bzNBuf - 1)) {
bzLineBuf[bzLineBufPosition] = '\0';
if (bzBufRemainder != NULL)
bzBufRemainder = (char *)realloc(bzBufRemainder, bzLineBufPosition);
else
bzBufRemainder = (char *)malloc(bzLineBufPosition);
strncpy(bzBufRemainder, bzLineBuf, bzLineBufPosition);
}
}
}
}
if (bzError != BZ_STREAM_END) {
BZ2_bzReadClose(&bzError, bzFp);
fprintf(stderr, "\n\t[gchr2] - Error: Bzip2 data could not be uncompressed\n\n");
return -1;
} else {
BZ2_bzReadGetUnused(&bzError, bzFp, 0, 0);
BZ2_bzReadClose(&bzError, bzFp);
}
free(bzBufRemainder);
bzBufRemainder = NULL;
I really appreciate everyone's help. This is working nicely.
I don't think there's a smarter approach (except finding an automata library that already does this for you). Be careful with allocating proper size for the "last line" buffer: if it cannot handle arbitrary length and the input comes from something accessible to third parties, it becomes a security risk.
I've also been working with processing bzip2 data per line, and I found that reading one byte at a time was too slow. This worked better for me:
#include <stdio.h>
#include <stdlib.h>
#include <bzlib.h>
/* gcc -o bz bz.c -lbz2 */
#define CHUNK 128
struct bzdata {
FILE *fp;
BZFILE *bzf;
int bzeof, bzlen, bzpos;
char bzbuf[4096];
};
static int bz2_open(struct bzdata *bz, char *file);
static void bz2_close(struct bzdata *bz);
static int bz2_read_line(struct bzdata *bz, char **line, int *li);
static int bz2_buf(struct bzdata *bz, char **line, int *li, int *ll);
static int
bz2_buf(struct bzdata *bz, char **line, int *li, int *ll)
{
int done = 0;
for (; bz->bzpos < bz->bzlen && done == 0; bz->bzpos++) {
if (*ll + 1 >= *li) {
*li += CHUNK;
*line = realloc(*line, (*li + 1) * sizeof(*(*line)));
}
if ( ((*line)[(*ll)++] = bz->bzbuf[bz->bzpos]) == '\n') {
done = 1;
}
}
if (bz->bzpos == bz->bzlen) {
bz->bzpos = bz->bzlen = 0;
}
(*line)[*ll] = '\0';
return done;
}
static int
bz2_read_line(struct bzdata *bz, char **line, int *li)
{
int bzerr = BZ_OK, done = 0, ll = 0;
if (bz->bzpos) {
done = bz2_buf(bz, line, li, &ll);
}
while (done == 0 && bz->bzeof == 0) {
bz->bzlen = BZ2_bzRead(&bzerr, bz->bzf, bz->bzbuf, sizeof(bz->bzbuf));
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
bz->bzpos = 0;
if (bzerr == BZ_STREAM_END) {
bz->bzeof = 1;
}
done = bz2_buf(bz, line, li, &ll);
} else {
done = -1;
}
}
/* Handle last lines that don't have a line feed */
if (done == 0 && ll > 0 && bz->bzeof) {
done = 1;
}
return done;
}
static int
bz2_open(struct bzdata *bz, char *file)
{
int bzerr = BZ_OK;
if ( (bz->fp = fopen(file, "rb")) &&
(bz->bzf = BZ2_bzReadOpen(&bzerr, bz->fp, 0, 0, NULL, 0)) &&
bzerr == BZ_OK) {
return 1;
}
return 0;
}
static void
bz2_close(struct bzdata *bz)
{
int bzerr;
if (bz->bzf) {
BZ2_bzReadClose(&bzerr, bz->bzf);
bz->bzf = NULL;
}
if (bz->fp) {
fclose(bz->fp);
bz->fp = NULL;
}
bz->bzpos = bz->bzlen = bz->bzeof = 0;
}
int main(int argc, char *argv[]) {
struct bzdata *bz = NULL;
int i, lc, li = 0;
char *line = NULL;
if (argc < 2) {
return fprintf(stderr, "usage: %s file [file ...]\n", argv[0]);
}
if ( (bz = calloc(1, sizeof(*bz))) ) {
for (i = 1; i < argc; i++) {
if (bz2_open(bz, argv[i])) {
for (lc = 0; bz2_read_line(bz, &line, &li) > 0; lc++) {
/* Process line here */
}
printf("%s: lines=%d\n", argv[i], lc);
}
bz2_close(bz);
}
free(bz);
}
if (line) {
free(line);
}
return 0;
}
This would be easy to do using C++'s std::string, but in C it takes some code if you want to do it efficiently (unless you use a dynamic string library).
char *bz_read_line(BZFILE *input)
{
size_t offset = 0;
size_t len = CHUNK; // arbitrary
char *output = (char *)xmalloc(len);
int bzerror;
while (BZ2_bzRead(&bzerror, input, output + offset, 1) == 1) {
if (offset+1 == len) {
len += CHUNK;
output = xrealloc(output, len);
}
if (output[offset] == '\n')
break;
offset++;
}
if (output[offset] == '\n')
output[offset] = '\0'; // strip trailing newline
else if (bzerror != BZ_STREAM_END) {
free(output);
return NULL;
}
return output;
}
(Where xmalloc and xrealloc handle errors internally. Don't forget to free the returned string.)
This is almost an order of magnitude slower than bzcat:
lars#zygmunt:/tmp$ wc foo
1193 5841 42868 foo
lars#zygmunt:/tmp$ bzip2 foo
lars#zygmunt:/tmp$ time bzcat foo.bz2 > /dev/null
real 0m0.010s
user 0m0.008s
sys 0m0.000s
lars#zygmunt:/tmp$ time ./a.out < foo.bz2 > /dev/null
real 0m0.093s
user 0m0.044s
sys 0m0.020s
Decide for yourself whether that's acceptable.
I think you should copy chunks of characters to another buffer until the latest chunk you write contains a new line character. Then you can work on the whole line.
You can save the rest of the buffer (after the '\n') into a temporary and then create a new line from it.

Resources