How to unpack a msgpack file? - c

I am writing msgpack-encoded data to a file. On writing, I am just using the fbuffer of the C API. As in (I striped all error handling for the example):
FILE *fp = fopen(filename, "ab");
msgpack_packer pk;
msgpack_packer_init(pk, fp, msgpack_fbuffer_write);
msgpack_pack_int(pk, 42);
// more data ...
How do I read this file back in? All the example I found assume that the data is in memory, however, my files are up to 5GB, it is not exactly a good idea to hold this in memory completely. Also I do not want to read in chunks myself. After all, I do not know how long the msgpack objects are, so chances are I end up with half an integer in my buffer.
Can msgpack's unpack somehow read from disk directly? Or is there some standard pattern to do this?

You might consider using "msgpack_unpacker" for that instead, which seems to be the official way that MessagePack implements a 'streaming' deserializer. Have a look at msgpack-c/example/c/lib_buffer_unpack.c
Regards, NiteHawk

Okay, I managed to do it.
Here is how to write:
#include <stdlib.h>
#include <msgpack.h>
#include <msgpack/fbuffer.h>
int main(int argc, char **argv) {
if(2 != argc) {
fprintf(stderr, "Call all writeFile <file>");
return;
}
FILE *fp = fopen(argv[1], "ab");
msgpack_packer pk;
msgpack_packer_init(&pk, fp, msgpack_fbuffer_write);
for(int i=0;i<2048;i++) {
msgpack_pack_int(&pk, i);
}
fclose(fp);
}
And this is what the read looks like:
#include <stdlib.h>
#include <msgpack.h>
static const int BUFFERSIZE = 2048;
int main(int argc, char **argv) {
if(2 != argc) {
fprintf(stderr, "Call with readFile <file>");
return 1;
}
char *inbuffer = (char *) malloc(BUFFERSIZE);
if(NULL == inbuffer) {
fprintf(stderr, "Out of memory!");
return 1;
}
FILE *fp = fopen(argv[1], "rb");
size_t off = 0;
size_t read = 0;
msgpack_unpacked unpacked;
msgpack_unpacked_init(&unpacked);
do {
read = fread(inbuffer, sizeof(char), BUFFERSIZE - off, fp);
off = 0;
while(msgpack_unpack_next(&unpacked, inbuffer, read, &off)) {
msgpack_object_print(stdout, unpacked.data);
puts("");
}
memcpy(inbuffer, &(inbuffer[off]), read-off);
off = read - off;
} while(read != 0);
free(inbuffer);
fclose(fp);
msgpack_unpacked_destroy(&unpacked);
return 0;
}
I did not try, but I think it will work with larger objects (arrays, maps etc.) as well.

Related

Using od command

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char* argv[]) {
char *filename = argv[1];
char *store = malloc(2);
FILE *fh = fopen(filename, "wb");
for(int i = 0; i < 100; i++) {
sprintf(store, "%u", i);
if (fh != NULL) {
fwrite (store, sizeof (store), 1, fh);
}
}
fclose (fh);
return 0;
}
I want my output to look like this -> https://imgur.com/a/nt2ly. The output it produces currently is all garabge.
The real reason for the garbage is the size of your data on the fwrite statement
fwrite (store, sizeof (store), 1, fh);
sizeof(store) is not the size of the string. It's the size of the pointer.
Aside, allocating 2 bytes for store is wrong. You're forgetting that a 2-digit number as a string needs space for a nul-terminator, so you're writing one char too many.
More minor issue: why testing the handle against NULL in the loop? you could exit in that case.
Also test the argument length (argc).
int main(int argc, char* argv[]) {
if (argc<2) exit(1); // protect against missing arg
char *filename = argv[1];
char store[50]; // use auto memory, faster & simpler, don't be shy on the size, don't shave it too close
FILE *fh = fopen(filename, "wb");
if (fh != NULL) { // test file handle here, not in the loop
for(int i = 0; i < 100; i++) {
// sprintf returns the number of printed chars, use this
// also use the proper format specifier for int
int nb_printed = sprintf(store, "%d", i);
// you may want to check the return value of fwrite...
fwrite (store, nb_printed, 1, fh);
}
fclose (fh);
return 0;
}
Note that this code will create a binary file with all numbers collated:
01234567891011...
so hardly useable. I would perform a sprintf(store, "%d ", i); instead to add spacing between the numbers.
Also note that if you wanted to write characters in a file, you'd be better off with:
fprintf(fh,"%d ",i);
(but I suppose the main point is to learn to use fwrite)

Why wont the program read from the 2 argument file?

So the assignment is to implement a substring search program using an input file to be searched from and an input to be searched. I created the following code:
#include <stdio.h>
#include <string.h>
int main(int argc,char *argv[])
{
FILE *fp;
fp = fopen(argv[1],"r");
if (fp == NULL)
{
printf("Error");
return 0;
}
char* tmpp[100];
int count = 0;
char* nexts = argv[2];
char* tmp = fgets(tmpp,100,fp);
while(tmp = strstr(tmp,nexts))
{
count++;
tmp++;
}
printf("%d\n\n",count);
fclose(fp);
return 0;
}
The program compiles but when i go to implement it in the ubuntu terminal as:
echo "aabb" >beta
./a.out beta a
1
Why isnt the program using the first argument (argv[1]) as beta and the second argument (argv[2]) as a correctly?
You should open a file and then read bytes from that file into temporary buffer:
FILE *file = fopen("file", "r");
while (1) {
char buffer[BUFSIZ+1];
size_t nread = fread(buffer, 1, sizeof(buffer)-1, file);
if (nread == 0) break; // read error or EOF
buffer[nread] = 0;
// chunk with BUFSIZ amount of bytes is available via buffer (and is zero-terminated)
}
If you want to search for string/pattern in a file, be aware that looked pattern in file may cross your chunk-size boundary, for example: you look for "hello", and BUFSIZ is 512. File contains "hello" at byte 510. Obviously, if you read by 512, you will get the first chunk ending with "he", and the second chunk starting with "llo". Probability of this situation is nonzero for all chunk sizes (except SIZE_MAX, but that buffer size is impossible by other reasons). Dealing with borders may be very complicated.
Close...but this is closer:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
if (argc != 3)
{
fprintf(stderr, "Usage: %s file pattern\n", argv[0]);
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (fp == NULL)
{
fprintf(stderr, "Error: failed to open file %s for reading\n", argv[1]);
return 1;
}
char tmpp[1000];
int count = 0;
char* nexts = argv[2];
while (fgets(tmpp, sizeof(tmpp), fp) != 0)
{
char *tmp = tmpp;
while ((tmp = strstr(tmp, nexts)) != 0)
{
count++;
tmp++;
}
}
printf("%d\n", count);
fclose(fp);
return 0;
}
The main difference is that this loops reading multiple lines from the input file. Yours would only work on files with a single line of input.

How do I get the input to cut off or wrap around at a certain point?

Alrighty, so after a day and a bit of being on stackoverflow, I learned it's useful being on this site :) I ended up getting my program to work. I can get an unlimited amount of text files in on the command line and display them as well! So it looks like this:
CMD Console
c:\Users\Username\Desktop> wrapfile.exe hello.txt how.txt. are.txt you.txt random.txt
Hello How are you doing today? I hope you're doing quite well. This is just a test to see how much I can fit on the screen.
Now, I wana build on this program. How would I get this new found text to wrap around? Like, if you wanted to make it that, every 40 characters or so, the text jumps to the next line... how could we go about doing something like that?
Thanks again!
Here's the code I'm working with:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
int l = 1;
while(l != argc)
{
FILE *fp; // declaring variable
fp = fopen(argv[l], "rb");
l++;
if (fp != NULL) // checks the return value from fopen
{
int i = 1;
do
{
i = fgetc(fp); // scans the file
printf("%c",i);
printf(" ");
}
while(i!=-1);
fclose(fp);
}
else
{
printf("Error.\n");
}
}
}
Okay, here we go...this looks a little different to yours, but this is ISO/ANSI C 1989 standard.
int main(int argc, char **argv)
{
FILE *fd = NULL;
char linebuf[40];
int arg = 1;
while (arg < argc) {
fd = fopen(argv[arg], "r");
if (NULL != fd) {
/* fgets(char *buf, size_t buflen, FILE *fd): returns NULL on error. */
while (NULL != fgets(linebuf, sizeof(linebuf), fd)) {
printf("%s\n", linebuf);
}
fclose(fd);
} else {
fprintf(stderr, "Cannot open \"%s\"\n", argv[arg]);
}
++arg;
}
}

C - why do I get this segfault with my File I/O program?

I'm making a practice program to make a simple alteration to a variable in my Makefile while learning C. I get a segfault whenever I run this program, but I don't know why. I suspect it has something to do with the "r+" fopen mode or my use of fseek(). Here is the code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void rewind(FILE *f)
{
long start = 0;
fseek(f, start, SEEK_SET);
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
printf("arguments too many or too few. use: setfile <filename> (minus .c extension)\n");
exit(1);
}
FILE *mfile = fopen("Makefile", "r+"); // note to self: r+ is for a file that already exists
FILE *old_mfile = fopen("OLD.Makefile", "r+"); // w+ erases the file and starts in read-write mode with a fresh one
char line[200];
char *fn_ptr;
char *name = argv[1];
while(fgets(line, 199, mfile)) // first create the backup
{
fputs(line , old_mfile); // before changing the line, write it to the backup
}
rewind(mfile); // reset the files to position 0
rewind(old_mfile);
puts("Makefile backed-up as 'OLD.Makefile'");
while(fgets(line, 199, old_mfile)) // now lets loop again and rewrite with the new FNAME
{
if((fn_ptr = (strstr(line, "FNAME= "))))
{
fn_ptr += strlen("FNAME= ");
int i;
for(i = 0; i < strlen(name); i++)
{
*(fn_ptr+i) = *(name+i);
}
*(fn_ptr+i) = '\0';
}
// printf("%s", line); // for debugging
fputs(line , mfile);
}
printf("FNAME is now: '%s'\n", argv[1]);
fclose(mfile);
fclose(old_mfile);
return 0;
}
Check this line again:
FILE *old_mfile = fopen("OLD.Makefile", "r+"); // w+ erases the file and starts in read-write mode with a fresh one
You have the correct mode in the comment, but not in the fopen call.
How to not get the segmentation fault, besides changing the mode? Always check return values! If fopen fails it will return NULL.
Here is a working version. There are several subtle points to note here so I will leave you to examine them one by one by toggling the changes in and out. The man pages for the called functions are probably enough if carefully read.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void rewind(FILE *f)
{
long start = 0;
fseek(f, start, SEEK_SET);
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
printf("arguments too many or too few. use: setfile <filename> (minus .c extension)\n");
exit(1);
}
FILE *mfile = fopen("Makefile", "r+"); // note to self: r+ is for a file that already exists
FILE *old_mfile = fopen("OLD.Makefile", "w+"); // w+ erases the file and starts in read-write mode with a fresh one
char line[200];
char *fn_ptr;
char *name = argv[1];
while(fgets(line, 199, mfile)) // first create the backup
{
fputs(line , old_mfile); // before changing the line, write it to the backup
memset(line,0x00,200);
}
rewind(mfile); // reset the files to position 0
rewind(old_mfile);
memset(line,0x00,200);
puts("Makefile backed-up as 'OLD.Makefile'");
fclose(mfile);
mfile = fopen("Makefile", "w");
while(fgets(line, 199, old_mfile)) // now lets loop again and rewrite with the new FNAME
{
if((fn_ptr = strstr(line, "FNAME=")) != NULL)
{
fn_ptr += strlen("FNAME=");
int i;
for(i = 0; i < strlen(name); i++)
{
*(fn_ptr+i) = *(name+i);
}
*(fn_ptr+i) = '\0';
}
// printf("%s", line); // for debugging
fputs(line , mfile);
fputs("\n" , mfile);
memset(line,0x00,200);
}
printf("FNAME is now: '%s'\n", argv[1]);
fclose(mfile);
fclose(old_mfile);
return 0;
}

C regex performance

I'm writing a code in C to perform some regex in enwik8 and enwik9. I'm also creating the same algorithm in other languages for benchmark purposes. The issue is that I'm doing something wrong with my C code because it takes 40 seconds while python and others take just 10 seconds.
What am I forgetting?
#include <stdio.h>
#include <regex.h>
#define size 1024
int main(int argc, char **argv){
FILE *fp;
char line[size];
regex_t re;
int x;
const char *filename = "enwik8";
const char *strings[] = {"\bhome\b", "\bdear\b", "\bhouse\b", "\bdog\b", "\bcat\b", "\bblue\b", "\bred\b", "\bgreen\b", "\bbox\b", "\bwoman\b", "\bman\b", "\bwomen\b", "\bfull\b", "\bempty\b", "\bleft\b", "\bright\b", "\btop\b", "\bhelp\b", "\bneed\b", "\bwrite\b", "\bread\b", "\btalk\b", "\bgo\b", "\bstay\b", "\bupper\b", "\blower\b", "\bI\b", "\byou\b", "\bhe\b", "\bshe\b", "\bwe\b", "\bthey\b"};
for(x = 0; x < 33; x++){
if(regcomp(&re, strings[x], REG_EXTENDED) != 0){
printf("Failed to compile regex '%s'\n", strings[x]);
return -1;
}
fp = fopen(filename, "r");
if(fp == 0){
printf("Failed to open file %s\n", filename);
return -1;
}
while((fgets(line, size, fp)) != NULL){
regexec(&re, line, 0, NULL, 0);
}
}
return 0;
}
file access and compiling regexes is probably a culprit.
compile your regexs once and store them in an array
open the file
read a line
run each compiled regex over it
close the file.

Resources