Intermediate C : String search in a large file - c

I'm writing a 'C' code that stores the TCP payload of captured packets in a file (payload of each packet is separated by multiple "\n" characters). Using C, is it possible to search for a particular string in the file after all the packets are captured?
P.S : The file can be very large, depending upon the number of captured packets.

Read the file line by line and search using strstr.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
char * pos;
int found = -1;
fp = fopen("filename", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1)
{
pos = strstr(line,"search_string");
if(pos != NULL)
{
found = 1;
break;
}
}
if(found==1)
printf("Found");
else
printf("Not Found");
fclose(fp);
if (line)
free(line);
exit(EXIT_SUCCESS);
}

Related

C File Reader from array

I can see the last printf output of y but the fpc turns null.
I suspected for double quotes in fopen function but not could not find a solution: how to fix it?
Part of the code ;
char *y = &arr_lines[1024*2];
FILE *fpc = fopen(y, "r");
if (fpc == NULL) {
printf("Error opening file.\n");
//return -1;
}
printf("TEST %s\n",y);
When I run the code;
Error opening file.
TEST /Users/lessons/AbstractLesson.java
Here is the full code;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define LINESIZE 1024
int main(void){
char *arr_lines, *line;
char buf_line[LINESIZE];
int num_lines = 0;
char buf[10240];
// open file
FILE *fp = fopen("/tmp/file", "r");
//FILE *fp1 = fopen(arr_lines, "r");
if (fp == NULL) {
printf("Error opening file.\n");
return -1;
}
// get number of lines; from http://stackoverflow.com/a/3837983
while (fgets(buf_line, LINESIZE, fp))
if (!(strlen(buf_line) == LINESIZE-1 && buf_line[LINESIZE-2] != '\n'))
num_lines++;
// allocate memory
arr_lines = (char*)malloc(num_lines * 1024 * sizeof(char));
// read lines
rewind(fp);
num_lines = 0;
line=arr_lines;
while (fgets(line, LINESIZE, fp))
if (!(strlen(line) == LINESIZE-1 && line[LINESIZE-2] != '\n'))
line += LINESIZE;
// print first four lines
char *y = &arr_lines[1024*2];
FILE *fpc = fopen(y, "r");
//FILE *fp1 = fopen(arr_lines, "r");
if (fpc == NULL) {
printf("Error opening file.\n");
//return -1;
}
printf("TEST [%s]\n",y);
//x = &arr_lines[1024*0];
// y = *x;
// finish
fclose(fp);
return 0;
}
Change printf("TEST %s\n", y) to printf("TEST \"%s\"\n", y) so we can see if you have any extra whitespace characters in the filename.
fgets() returns the new line, if it's there. I didn't see where your code clears the newline. Does your path string include the new line?
Beyond that, fopen() is almost certainly working correctly. The only options are 1) The path is not correct, 2) the path has whitespace or other invalid characters, or 3) the file is not available for reading.
If you don't have a new line in your path, then you simply haven't provided enough information to resolve this issue.
I suspect the path, /Users/lessons/AbstractLesson.java, is wrong. It looks like an OSX path and it might be missing the username between Users and lessons.

Unable to read a file and pass into arguments

1) I'm trying to open a file, read the mix data (ints, chars and strings) and store them into args.
1.1) so in the sample.txt is a total of 13 (excluding args[0])
2) Need to read a file from terminal "./myprog.c < sample.txt"
Heres my code and have no idea where i went wrong:
sample.txt:
123 213 110 90 1
hello my friend
boo bleh
a b c
myprog.c:
#include <stdio.h>
int main()
{
int i = 1;
FILE *fstin=fopen(argv[0], "r"); //open the file
if (fstin == NULL) {
puts("Couldn't fopen...");
return -1;
}
//Getting all the inputs from file
while ((fscanf(fstin, "%d", argv[i])) != EOF){
i++;
}
fclose(fstin);
for (i=0; i<10; i++) {
printf("%d\n",argv[i]);
}
return 0;
}
Any help is greatly appreciated!
PS: Would like if anyone could post their complete solution? Will upload unto this post and let everyone have a review of this problem
PPS: Please excuse the poor level of coding as I am a beginner and completely new to C.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int ac, char *av[]){
int i, argc=0;
char **argv=NULL, data[16];
FILE *fstin = stdin;
if(ac == 2){
if(NULL==(fstin = fopen(av[1], "r"))){
puts("Couldn't fopen...");
return -1;
}
}
while (1==fscanf(fstin, "%15s", data)){
argv = realloc(argv, (argc+1)*sizeof(char*));
argv[argc] = malloc(strlen(data)+1);
strcpy(argv[argc++], data);
}
if(ac == 2)
fclose(fstin);
for (i=0; i<argc; ++i) {
printf("%s\n", argv[i]);
}
//deallocate
return 0;
}
You are making mistake at 2nd point where you divert your file to other file which is wrong. Actually you need to first compile and need to make executable.
gcc -o my_prog ./myprog.c -Wall
You need to execute this program as below to read file from c program:
./my_prog ./sample.txt
As you are new to C programming first go to man pages related to file operations.
Solution:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
//If command line argument is not inserted then stop operation
if (2 != argc) {
printf("Invalid number of arguments : %d\n", argc);
return -1;
}
int size = 0, ret = 0;
char *data = NULL;
FILE *fp = NULL;
//Open file in read mode given from command line argument
if (NULL != (fp = fopen(argv[1], "r")))
{
//Find size of file
fseek(fp, 0L, SEEK_END);
size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
//if file is empty no need to read it.
if (size > 0)
{
//Data pointer which contains file information
data = (char *) calloc(sizeof(char), size);
if (NULL != data)
{
//Read whole file in one statement
fread(data, sizeof(char), size, fp);
printf("File %s is readed successfully\n", argv[1]);
printf("Data:\n");
printf("%s\n", data);
free(data); data = NULL;
}
else
{
perror("memory allocation failed\n");
ret = -1;
}
}
else
{
printf("File %s is empty\n", argv[1]);
}
fclose(fp); fp = NULL;
}
else
{
perror("File open failed\n");
ret = -1;
}
return ret;
}
Now Test it on your setup and if any query please post comments.

CCAN JSON Serialization in C

I'm using JSON library. It's quite light and easy to understand, but I have one problem with json_decode. I'm reading data(JSON) from file:
FILE *instream = fopen("/tmp/file.dat", "r");
char ch;
int count = 0;
do {
ch = getc(instream);
inbuffer[count] = ch;
count++;
} while (!feof(instream) && ch != '\0');
My file look like below, so inbuffer has the same text
[
{
"MBV": 0,
"CRRC": 0,
"LFrei": 0
}
]
I try to decode it to have JsonNode variable
static char *chomp(char *s) //function taken from CCAN JSON example
{
char *e;
if (s == NULL || *s == 0)
return s;
e = strchr(s, 0);
if (e[-1] == '\n')
*--e = 0;
return s;
}
const char *s = chomp(inbuffer);
JsonNode *jin = json_decode(s);
printf("JSON: %s\n", jin);
After I run my program I get
JSON: (null)
Can somebody tell me why json_decode function doesn't want to read JSON formatted file even if the file created using this library?
I don't know the JSON library you ar using but I suspect the error occurs because inbuffer is not correctly NUL terminated. On end of file, getc() returns EOF (-1) what you copy into inbuffer before feof() returns TRUE. I would do:
while( (ch = getc( instream )) != EOF ) {
inbuffer[count] = ch;
count++;
}
inbuffer[count] = '\0';
or just use fread():
count = fread( inbuffer, 1, sizeof( inbuffer ) - 1, stream );
inbuffer[count] = '\0';
This code prints all the key-value pairs found in the first object of the array (your example shows an array containing only one object, which in turn contains three key-value items):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ccan/json/json.h>
int main(int argc, char **argv)
{
FILE *fd;
long filesize;
char *buffer;
JsonNode *jin, *node;
if ((fd = fopen(argv[1], "r")) == NULL) {
perror("Error opening file");
return EXIT_FAILURE;
}
fseek(fd, 0, SEEK_END);
filesize = ftell(fd);
rewind(fd);
buffer = (char *) malloc(sizeof(char) * filesize+1);
if (fread(buffer, sizeof(char), filesize, fd) != filesize) {
fprintf(stderr, "Error reading file\n");
return EXIT_FAILURE;
}
buffer[filesize] = '\0';
jin = json_decode(buffer);
json_foreach(node, jin->children.head)
printf("%s: %g\n", node->key, node->number_);
free(buffer);
return EXIT_SUCCESS;
}
Take into account that the traversal I just did heavily depends on the structure of the JSON you're trying to parse. If it fits your needs, fine, but maybe you'll need to find a more general solution. Take a look at the json.h header file included with the library to understand how the JSON structure is stored in memory once decoded from the file.
Also, in my personal experience, I've found the JSON-C library much easier to use than this one.

Why wont the program read from the 2 argument file?

So the assignment is to implement a substring search program using an input file to be searched from and an input to be searched. I created the following code:
#include <stdio.h>
#include <string.h>
int main(int argc,char *argv[])
{
FILE *fp;
fp = fopen(argv[1],"r");
if (fp == NULL)
{
printf("Error");
return 0;
}
char* tmpp[100];
int count = 0;
char* nexts = argv[2];
char* tmp = fgets(tmpp,100,fp);
while(tmp = strstr(tmp,nexts))
{
count++;
tmp++;
}
printf("%d\n\n",count);
fclose(fp);
return 0;
}
The program compiles but when i go to implement it in the ubuntu terminal as:
echo "aabb" >beta
./a.out beta a
1
Why isnt the program using the first argument (argv[1]) as beta and the second argument (argv[2]) as a correctly?
You should open a file and then read bytes from that file into temporary buffer:
FILE *file = fopen("file", "r");
while (1) {
char buffer[BUFSIZ+1];
size_t nread = fread(buffer, 1, sizeof(buffer)-1, file);
if (nread == 0) break; // read error or EOF
buffer[nread] = 0;
// chunk with BUFSIZ amount of bytes is available via buffer (and is zero-terminated)
}
If you want to search for string/pattern in a file, be aware that looked pattern in file may cross your chunk-size boundary, for example: you look for "hello", and BUFSIZ is 512. File contains "hello" at byte 510. Obviously, if you read by 512, you will get the first chunk ending with "he", and the second chunk starting with "llo". Probability of this situation is nonzero for all chunk sizes (except SIZE_MAX, but that buffer size is impossible by other reasons). Dealing with borders may be very complicated.
Close...but this is closer:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
if (argc != 3)
{
fprintf(stderr, "Usage: %s file pattern\n", argv[0]);
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (fp == NULL)
{
fprintf(stderr, "Error: failed to open file %s for reading\n", argv[1]);
return 1;
}
char tmpp[1000];
int count = 0;
char* nexts = argv[2];
while (fgets(tmpp, sizeof(tmpp), fp) != 0)
{
char *tmp = tmpp;
while ((tmp = strstr(tmp, nexts)) != 0)
{
count++;
tmp++;
}
}
printf("%d\n", count);
fclose(fp);
return 0;
}
The main difference is that this loops reading multiple lines from the input file. Yours would only work on files with a single line of input.

How can I read an XML file into a buffer in C?

I want to read an XML file into a char *buffer using C.
What is the best way to do this?
How should I get started?
And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
int main(int argc, char **argv) {
xmlDoc *document;
xmlNode *root, *first_child, *node;
char *filename;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
return 1;
}
filename = argv[1];
document = xmlReadFile(filename, NULL, 0);
root = xmlDocGetRootElement(document);
fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
first_child = root->children;
for (node = first_child; node; node = node->next) {
fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
}
fprintf(stdout, "...\n");
return 0;
}
On an Unix machine, you typically compile the above with:
% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).
Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:
#include <stdlib.h>
#include <stdio.h>
long fsize(FILE * file)
{
if(fseek(file, 0, SEEK_END))
return -1;
long size = ftell(file);
if(size < 0)
return -1;
if(fseek(file, 0, SEEK_SET))
return -1;
return size;
}
size_t fget_contents(char ** str, const char * name, _Bool * error)
{
FILE * file = NULL;
size_t read = 0;
*str = NULL;
if(error) *error = 1;
do
{
file = fopen(name, "rb");
if(!file) break;
long size = fsize(file);
if(size < 0) break;
if(error) *error = 0;
*str = malloc((size_t)size + 1);
if(!*str) break;
read = fread(*str, 1, (size_t)size, file);
(*str)[read] = 0;
*str = realloc(*str, read + 1);
if(error) *error = (size != (long)read);
}
while(0);
if(file) fclose(file);
return read;
}
Install libxml2 as a NuGet package in Visual studio(I am using Vs 2015 to test this)
Copy and paste the contents under example XML file in a notepad and save the file as example.xml
Copy and past the code under //xml parsing in to Vs
Call the function from main with xml file name as an argument
You will be getting the xml data in configReceive
That's all...
example XML file:
<?xml version="1.0" encoding="utf-8"?>
<config>
<xmlConfig value1="This is a simple XML parsing program in C"/>
<xmlConfig value2="Thank you : Banamali Mishra"/>
<xmlConfig value3="2000000"/>
<xmlConfig value4="80"/>
<xmlConfig value5="10"/>
<xmlConfig value6="1"/>
</config>
Here is the source code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
char configReceive[6][80] = { " " };
//xml parsing
void ParsingXMLFile(char *filename) {
char *docname;
xmlDocPtr doc;
xmlNodePtr cur;
xmlChar *uri;
char config[6][80] = { "value1", "value2", "value3", "value4", "value5", "value6" };
int count = 0;
int count1 = 0;
docname = filename;
doc = xmlParseFile(docname);
cur = xmlDocGetRootElement(doc);
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"xmlConfig"))) {
uri = xmlGetProp(cur, (xmlChar *)config[count++]);
strcpy(configReceive[count1++], (char *)uri);
xmlFree(uri);
}
cur = cur->next;
}
count = 0;
count1 = 0;
xmlFreeDoc(doc);
}
You can use the stat() function to get the file size. then allocate a buffer using malloc after it reading the file using fread.
the code will be something like that:
struct stat file_status;
char *buf = NULL;
FILE * pFile;
stat("tmp.xml", &file_status);
buf = (char*)malloc(file_status.st_size);
pFile = fopen ("tmp.xml","r");
fread (buf,1,file_status.st_size,pFile);
fclose(pFile);
Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.
N.B. everything is done in main(). Turning it into a callable function is left as an exercise for the reader.
(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi.)
Comments on this will be addressed in approximately eight hours.
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[]) {
char *buffer; /* holds the file contents. */
size_t i; /* indexing into buffer. */
size_t buffer_size; /* size of the buffer. */
char *temp; /* for realloc(). */
char c; /* for reading from the input. */
FILE *input; /* our input stream. */
if (argc == 1) {
fprintf(stderr, "Needs a filename argument.\n");
exit(EXIT_FAILURE);
}
else if (argc > 2) {
fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
}
if ((input = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Error opening input file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
/* Initial allocation of buffer */
i = 0;
buffer_size = BUFSIZ;
if ((buffer = malloc(buffer_size)) == NULL) {
fprintf(stderr, "Error allocating memory (before reading file).\n");
fclose(input);
}
while ((c = fgetc(input)) != EOF) {
/* Enlarge buffer if necessary. */
if (i == buffer_size) {
buffer_size += BUFSIZ;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core while reading file.\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
/* Add input char to the buffer. */
buffer[i++] = c;
}
/* Test if loop terminated from error. */
if (ferror(input)) {
fprintf(stderr, "There was a file input error.\n");
free(buffer);
fclose(input);
exit(EXIT_FAILURE);
}
/* Make the buffer a bona-fide string. */
if (i == buffer_size) {
buffer_size += 1;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
buffer[i] = '\0';
puts(buffer);
/* Clean up. */
free(buffer);
fclose(input);
return 0;
}
I believe that question was about XML parsing and not about file reading, however OP should really clarify this.
Any way you got plenty example how to read file.
Another option to xml parsing in additional to sgm suggestion will be Expat library
Suggestion: Use memory mapping
This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:
mmap.h
#ifndef MMAP_H
#define MMAP_H
#include <sys/types.h>
struct region_t {
void *head;
off_t size;
};
#define OUT_OF_BOUNDS(reg, p) \
(((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))
#define REG_SHOW(reg) \
printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);
struct region_t *do_mmap(const char *fn);
#endif
mmap.c
#include <stdlib.h>
#include <sys/types.h> /* open lseek */
#include <sys/stat.h> /* open */
#include <fcntl.h> /* open */
#include <unistd.h> /* lseek */
#include <sys/mman.h> /* mmap */
#include "mmap.h"
struct region_t *do_mmap(const char *fn)
{
struct region_t *R = calloc(1, sizeof(struct region_t));
if(R != NULL) {
int fd;
fd = open(fn, O_RDONLY);
if(fd != -1) {
R->size = lseek(fd, 0, SEEK_END);
if(R->size != -1) {
R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
if(R->head) {
close(fd); /* don't need file-destructor anymore. */
return R;
}
/* no clean up of borked (mmap,) */
}
close(fd); /* clean up of borked (lseek, mmap,) */
}
free(R); /* clean up of borked (open, lseek, mmap,) */
}
return NULL;
}

Resources