How can I read an XML file into a buffer in C? - c

I want to read an XML file into a char *buffer using C.
What is the best way to do this?
How should I get started?

And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
int main(int argc, char **argv) {
xmlDoc *document;
xmlNode *root, *first_child, *node;
char *filename;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
return 1;
}
filename = argv[1];
document = xmlReadFile(filename, NULL, 0);
root = xmlDocGetRootElement(document);
fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
first_child = root->children;
for (node = first_child; node; node = node->next) {
fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
}
fprintf(stdout, "...\n");
return 0;
}
On an Unix machine, you typically compile the above with:
% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c

Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).

Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:
#include <stdlib.h>
#include <stdio.h>
long fsize(FILE * file)
{
if(fseek(file, 0, SEEK_END))
return -1;
long size = ftell(file);
if(size < 0)
return -1;
if(fseek(file, 0, SEEK_SET))
return -1;
return size;
}
size_t fget_contents(char ** str, const char * name, _Bool * error)
{
FILE * file = NULL;
size_t read = 0;
*str = NULL;
if(error) *error = 1;
do
{
file = fopen(name, "rb");
if(!file) break;
long size = fsize(file);
if(size < 0) break;
if(error) *error = 0;
*str = malloc((size_t)size + 1);
if(!*str) break;
read = fread(*str, 1, (size_t)size, file);
(*str)[read] = 0;
*str = realloc(*str, read + 1);
if(error) *error = (size != (long)read);
}
while(0);
if(file) fclose(file);
return read;
}

Install libxml2 as a NuGet package in Visual studio(I am using Vs 2015 to test this)
Copy and paste the contents under example XML file in a notepad and save the file as example.xml
Copy and past the code under //xml parsing in to Vs
Call the function from main with xml file name as an argument
You will be getting the xml data in configReceive
That's all...
example XML file:
<?xml version="1.0" encoding="utf-8"?>
<config>
<xmlConfig value1="This is a simple XML parsing program in C"/>
<xmlConfig value2="Thank you : Banamali Mishra"/>
<xmlConfig value3="2000000"/>
<xmlConfig value4="80"/>
<xmlConfig value5="10"/>
<xmlConfig value6="1"/>
</config>
Here is the source code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
char configReceive[6][80] = { " " };
//xml parsing
void ParsingXMLFile(char *filename) {
char *docname;
xmlDocPtr doc;
xmlNodePtr cur;
xmlChar *uri;
char config[6][80] = { "value1", "value2", "value3", "value4", "value5", "value6" };
int count = 0;
int count1 = 0;
docname = filename;
doc = xmlParseFile(docname);
cur = xmlDocGetRootElement(doc);
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"xmlConfig"))) {
uri = xmlGetProp(cur, (xmlChar *)config[count++]);
strcpy(configReceive[count1++], (char *)uri);
xmlFree(uri);
}
cur = cur->next;
}
count = 0;
count1 = 0;
xmlFreeDoc(doc);
}

You can use the stat() function to get the file size. then allocate a buffer using malloc after it reading the file using fread.
the code will be something like that:
struct stat file_status;
char *buf = NULL;
FILE * pFile;
stat("tmp.xml", &file_status);
buf = (char*)malloc(file_status.st_size);
pFile = fopen ("tmp.xml","r");
fread (buf,1,file_status.st_size,pFile);
fclose(pFile);

Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.
N.B. everything is done in main(). Turning it into a callable function is left as an exercise for the reader.
(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi.)
Comments on this will be addressed in approximately eight hours.
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[]) {
char *buffer; /* holds the file contents. */
size_t i; /* indexing into buffer. */
size_t buffer_size; /* size of the buffer. */
char *temp; /* for realloc(). */
char c; /* for reading from the input. */
FILE *input; /* our input stream. */
if (argc == 1) {
fprintf(stderr, "Needs a filename argument.\n");
exit(EXIT_FAILURE);
}
else if (argc > 2) {
fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
}
if ((input = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Error opening input file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
/* Initial allocation of buffer */
i = 0;
buffer_size = BUFSIZ;
if ((buffer = malloc(buffer_size)) == NULL) {
fprintf(stderr, "Error allocating memory (before reading file).\n");
fclose(input);
}
while ((c = fgetc(input)) != EOF) {
/* Enlarge buffer if necessary. */
if (i == buffer_size) {
buffer_size += BUFSIZ;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core while reading file.\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
/* Add input char to the buffer. */
buffer[i++] = c;
}
/* Test if loop terminated from error. */
if (ferror(input)) {
fprintf(stderr, "There was a file input error.\n");
free(buffer);
fclose(input);
exit(EXIT_FAILURE);
}
/* Make the buffer a bona-fide string. */
if (i == buffer_size) {
buffer_size += 1;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
buffer[i] = '\0';
puts(buffer);
/* Clean up. */
free(buffer);
fclose(input);
return 0;
}

I believe that question was about XML parsing and not about file reading, however OP should really clarify this.
Any way you got plenty example how to read file.
Another option to xml parsing in additional to sgm suggestion will be Expat library

Suggestion: Use memory mapping
This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:
mmap.h
#ifndef MMAP_H
#define MMAP_H
#include <sys/types.h>
struct region_t {
void *head;
off_t size;
};
#define OUT_OF_BOUNDS(reg, p) \
(((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))
#define REG_SHOW(reg) \
printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);
struct region_t *do_mmap(const char *fn);
#endif
mmap.c
#include <stdlib.h>
#include <sys/types.h> /* open lseek */
#include <sys/stat.h> /* open */
#include <fcntl.h> /* open */
#include <unistd.h> /* lseek */
#include <sys/mman.h> /* mmap */
#include "mmap.h"
struct region_t *do_mmap(const char *fn)
{
struct region_t *R = calloc(1, sizeof(struct region_t));
if(R != NULL) {
int fd;
fd = open(fn, O_RDONLY);
if(fd != -1) {
R->size = lseek(fd, 0, SEEK_END);
if(R->size != -1) {
R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
if(R->head) {
close(fd); /* don't need file-destructor anymore. */
return R;
}
/* no clean up of borked (mmap,) */
}
close(fd); /* clean up of borked (lseek, mmap,) */
}
free(R); /* clean up of borked (open, lseek, mmap,) */
}
return NULL;
}

Related

Segmentation fault in C while iterating through array

I am trying to mimic a basic encryption algorithm. I try to read the encrypted data file and look for each corresponding value in a JRB tree (simple key-value pair in my case) which is basically filled from the ".key" file and then write it to another file.
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "fields.h"
#include <cJSON.h>
#include "jrb.h"
void decrypt();
int main(int argc, char **argv)
{
decrypt();
return 0;
}
void decrypt()
{
IS is_input;
FILE *fp;
int i, j;
char *buffer = 0;
char *str = 0;
long length;
FILE *f = fopen ("data/.key", "rb");
cJSON *json;
JRB b, tmp;
b = make_jrb();
tmp = make_jrb();
is_input = new_inputstruct("data/encrypted");
if (is_input == NULL) {
perror("Error: ");
exit(1);
}
fp = fopen("data/decrypted.txt", "w+");
if (fp < 0) { perror("Error: "); exit(1); }
if (f)
{
fseek (f, 0, SEEK_END);
length = ftell (f);
fseek (f, 0, SEEK_SET);
buffer = malloc (length);
if (buffer)
{
fread (buffer, 1, length, f);
}
fclose (f);
}
if (buffer)
{
json = cJSON_Parse(buffer);
cJSON *current_element = NULL;
char *current_key = NULL;
cJSON_ArrayForEach(current_element, json)
{
current_key = current_element->string;
if (current_key != NULL)
{
(void) jrb_insert_str(b, strdup(current_element->valuestring), new_jval_v(current_key));
}
}
while(get_line(is_input) >= 0) {
for (i = 0; i < is_input->NF; i++) {
str = is_input->fields[i];
tmp = jrb_find_str(b, "10001011"); // This works but when I use "str" here instead of "10001011", I get a segmentation fault.
// tmp = jrb_find_str(b, str);
fprintf(fp, "%s ", tmp->val.s);
}
}
}
jettison_inputstruct(is_input);
fclose(fp);
return;
}
The .key file is like this:
{
"hi": "0",
"merhaba": "10",
"hallo": "11"
}
After running the program with printf I get a Segmentation fault after printing it is data like this:
0 10 11Segmentation fault (core dumped)
But if I try to use fprintf to write it into another file I directly get the Segmentation fault error.
I tried to debug and I see that the tmp value is null but how can it be null?
About JRB: http://web.eecs.utk.edu/~jplank/plank/classes/cs360/360/notes/JRB/index.html
Input struct:
const char *name; /* File name */
FILE *f; /* File descriptor */
int line; /* Line number */
char text1[MAXLEN]; /* The line */
char text2[MAXLEN]; /* Working -- contains fields */
int NF; /* Number of fields */
char *fields[MAXFIELDS]; /* Pointers to fields */
int file; /* 1 for file, 0 for popen */

Intermediate C : String search in a large file

I'm writing a 'C' code that stores the TCP payload of captured packets in a file (payload of each packet is separated by multiple "\n" characters). Using C, is it possible to search for a particular string in the file after all the packets are captured?
P.S : The file can be very large, depending upon the number of captured packets.
Read the file line by line and search using strstr.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
char * pos;
int found = -1;
fp = fopen("filename", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1)
{
pos = strstr(line,"search_string");
if(pos != NULL)
{
found = 1;
break;
}
}
if(found==1)
printf("Found");
else
printf("Not Found");
fclose(fp);
if (line)
free(line);
exit(EXIT_SUCCESS);
}

parsing a file using fgets()

There is probably more issues at hand, but for now my problem is that when I compile and run this like so:
cc -o parser parser.c
./parser
I expect it to open a particular file, read from it, and parse it. However, it seems to expect me to provide input and I have to Ctrl-C to kill it. Am I using fgets wrong? I tried getline() with the same results. I added the puts() to make sure it was reading what I expected and it does. Any help is appreciated.
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
FILE *fp;
char buf[1024];
char *tmp, *pattern, *dir;
char *skip, *p;
char *tok[5];
char **ap;
size_t sz = 0;
ssize_t len;
int i;
int action = 0; // placeholder
int fileinto = 1; // placeholder
char path[PATH_MAX] = "/home/edgar/.patfile";
fp = fopen(path, "r");
if (fp == NULL)
fprintf(stderr, "fopen failed");
while (fgets(buf, sizeof(buf), fp) != NULL) {
buf[strcspn(buf, "\n")] = '\0';
// skip comments
for (skip = buf; *skip; ++skip) {
if (*skip == '#') {
*skip = '\0';
break;
}
}
// skip empty lines
if (strlen(buf) == 0)
continue;
puts(buf); // debug only
// make a copy
tmp = strdup(buf);
for (i = 0, ap = tok; ap < &tok[4] && (*ap = strsep(&tmp, " ")) != NULL; i++) {
if (**ap != '\0')
ap++;
}
while (i >= 0) {
if(tok[i] == "match")
pattern = tok[i + 1];
if(tok[i] == "fileinto") {
action = fileinto;
dir = tok[i + 1];
}
}
}
free(tmp);
fclose(fp);
exit(0);
}

String search C program for command prompt

I wrote C program of searching string. The problem is MyStrstr() function doesn't work with
command prompt. It only works with IDE. So, can anyone advise me how to fix the code for working with command prompt. With regards...
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ARGUMENT_COUNT 3
#define FILE_INDEX 2
#define SEARCH_INDEX 1
#define BUFFER 256
#define SUCCESS 0
#define ERRCODE_PARAM 1
#define ERRCODE_FILENAME 2
#define MSG_USAGE "String Search Program === EXER5 === by Newbie\nUsage: %s Search_String fileName"
#define MSG_ERROR "Can not open file. [%s]"
char* MyStrstr(char* pszSearchString, char* pszSearchWord);
int main(int argc, char* argv[])
{
FILE* pFile = NULL;
char szData[BUFFER];
char* pszCutString = NULL;
if(argc != ARGUMENT_COUNT) {
printf(MSG_USAGE, argv[0]);
return ERRCODE_PARAM;
}
pFile = fopen(argv[FILE_INDEX], "r");
if(pFile == NULL) {
printf(MSG_ERROR, argv[FILE_INDEX]);
return ERRCODE_FILENAME;
}
pszCutString = MyStrstr(szData, argv[SEARCH_INDEX]);
if(pszCutString != NULL) {
printf("%s", pszCutString);
}
fclose(pFile);
pFile = NULL;
return SUCCESS;
}
char* MyStrstr(char* pszSearchString, char* pszSearchWord) {
int nFcount = 0;
int nScount = 0;
int nSearchLen = 0;
int nIndex = 0;
char* pszDelString = NULL;
char cSLen = 0;
size_t len = 0;
if(pszSearchString == NULL || pszSearchWord == NULL) {
return NULL;
}
while(pszSearchWord[nSearchLen] != '\0') {
nSearchLen++;
}
if(nSearchLen <= 0){
return pszSearchString;
}
cSLen = *pszSearchWord++;
if (!cSLen) {
return (char*) pszSearchString;
}
len = strlen(pszSearchWord);
do {
char cMLength;
do {
cMLength = *pszSearchString++;
if (!cMLength)
return (char *) 0;
} while (cMLength != cSLen);
} while (strncmp(pszSearchString, pszSearchWord, len) != 0);
return (char *) (pszSearchString - 1);
}
You want to open a file, search the contents of that file for a string and return/print that. You are instead doing:
char szData[256]; // <-- making an uninitialized buffer
char* pszCutString = NULL;
pFile = fopen(argv[2], "r"); // <-- Opening a file
pszCutString = MyStrstr(szData, argv[1]); // <-- searching the buffer
if(pszCutString != NULL) {
printf("%s", pszCutString);
}
fclose(pFile); // <-- Closing the file
So you never fill your buffer szData with the contents of the file noted in argv[2]. You're trying to search an uninitialized buffer for a string. You're luck the result is just "no output comes out".
You need to take the contents of the file in argv[2] and place it in the buffer szData then do the search. This could be accomplished by adding a call to a function like read() or fscanf()
Note 1:
I assume when you say this "worked" in the IDE, the code was a little different and you weren't using the command line arguments.
Note 2:
you should also check to fopen() worked before trying to read from/close pFile, and if your file is possibly larger than 256 characters you will need to change your code to either have a dynamically sized string, or you'll need to loop the buffer fills (but then you have to worry about breaking a word apart), or some other mechanism to check the full file.

Unable to read a file and pass into arguments

1) I'm trying to open a file, read the mix data (ints, chars and strings) and store them into args.
1.1) so in the sample.txt is a total of 13 (excluding args[0])
2) Need to read a file from terminal "./myprog.c < sample.txt"
Heres my code and have no idea where i went wrong:
sample.txt:
123 213 110 90 1
hello my friend
boo bleh
a b c
myprog.c:
#include <stdio.h>
int main()
{
int i = 1;
FILE *fstin=fopen(argv[0], "r"); //open the file
if (fstin == NULL) {
puts("Couldn't fopen...");
return -1;
}
//Getting all the inputs from file
while ((fscanf(fstin, "%d", argv[i])) != EOF){
i++;
}
fclose(fstin);
for (i=0; i<10; i++) {
printf("%d\n",argv[i]);
}
return 0;
}
Any help is greatly appreciated!
PS: Would like if anyone could post their complete solution? Will upload unto this post and let everyone have a review of this problem
PPS: Please excuse the poor level of coding as I am a beginner and completely new to C.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int ac, char *av[]){
int i, argc=0;
char **argv=NULL, data[16];
FILE *fstin = stdin;
if(ac == 2){
if(NULL==(fstin = fopen(av[1], "r"))){
puts("Couldn't fopen...");
return -1;
}
}
while (1==fscanf(fstin, "%15s", data)){
argv = realloc(argv, (argc+1)*sizeof(char*));
argv[argc] = malloc(strlen(data)+1);
strcpy(argv[argc++], data);
}
if(ac == 2)
fclose(fstin);
for (i=0; i<argc; ++i) {
printf("%s\n", argv[i]);
}
//deallocate
return 0;
}
You are making mistake at 2nd point where you divert your file to other file which is wrong. Actually you need to first compile and need to make executable.
gcc -o my_prog ./myprog.c -Wall
You need to execute this program as below to read file from c program:
./my_prog ./sample.txt
As you are new to C programming first go to man pages related to file operations.
Solution:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
//If command line argument is not inserted then stop operation
if (2 != argc) {
printf("Invalid number of arguments : %d\n", argc);
return -1;
}
int size = 0, ret = 0;
char *data = NULL;
FILE *fp = NULL;
//Open file in read mode given from command line argument
if (NULL != (fp = fopen(argv[1], "r")))
{
//Find size of file
fseek(fp, 0L, SEEK_END);
size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
//if file is empty no need to read it.
if (size > 0)
{
//Data pointer which contains file information
data = (char *) calloc(sizeof(char), size);
if (NULL != data)
{
//Read whole file in one statement
fread(data, sizeof(char), size, fp);
printf("File %s is readed successfully\n", argv[1]);
printf("Data:\n");
printf("%s\n", data);
free(data); data = NULL;
}
else
{
perror("memory allocation failed\n");
ret = -1;
}
}
else
{
printf("File %s is empty\n", argv[1]);
}
fclose(fp); fp = NULL;
}
else
{
perror("File open failed\n");
ret = -1;
}
return ret;
}
Now Test it on your setup and if any query please post comments.

Resources