wzip.c OS three easy steps - c

I am newbie in OS. Im currently learning OS three easy steps.
I found this code for the the first project of the course.
(wzip) is a file compression tool, and the other (wunzip) is a file decompression tool.
input:
aaaaaaaaaabbbb
correct output:
10a4b
instructions: write out a 4-byte integer in binary format followed by the single character in ASCII.
current output:
ab
I type in the shell:
prompt> gcc -o wzip wzip.c -Wall -Werror
prompt> ./wzip file1.txt > file1.z
This is the link for the project:
https://github.com/remzi-arpacidusseau/ostep-projects/tree/master/initial-utilities
This is the code I found for this specific part of the project:
wzip:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
/*
wzip: is a file compresion tool.
*/
void writeFile(int , char *);
int main(int argc, char *argv[]){
FILE *fp;
char newbuff[2], oldbuff[2];
int count;
if (argc < 2){
printf("wzip: file1 [file2 ...]\n");
exit(EXIT_FAILURE);
}
// open files
for (size_t i = 1; i < argc; i++){
if ((fp = fopen(argv[i], "r")) == NULL){
printf("wzip: cannot open file\n");
exit(EXIT_FAILURE);
}
while (fread(newbuff, 1, 1, fp)){
if (strcmp(newbuff, oldbuff) == 0){
count++;
} else {
if (oldbuff[0] != '\0'){
writeFile(count, oldbuff);
}
count = 1;
strcpy(oldbuff, newbuff);
}
}
fclose(fp);
}
writeFile(count, oldbuff);
return 0;
}
void writeFile(int count, char *oldbuff){
// write as network byte order
count = htonl(count);
fwrite(&count, 4, 1, stdout);
fwrite(oldbuff, 1, 1, stdout);
}
wunzip:
#include <stdio.h>
#include <stdlib.h> // exit
#include <string.h> // memset
#include <arpa/inet.h> // ntohl
int
main(int argc, char *argv[])
{
FILE *fp;
char buff[5];
if (argc <= 1) {
printf("wunzip: file1 [file2 ...]\n");
exit(EXIT_FAILURE);
}
for (size_t i = 1; i < argc; i++) {
if ((fp = fopen(argv[i], "r")) == NULL) {
printf("wunzip: cannot open file\n");
exit(EXIT_FAILURE);
}
int count = 0;
while (fread(&count, 4, 1, fp)) {
count = ntohl(count); // read from network byte order
memset(buff, 0, strlen(buff));
fread(buff, 1, 1, fp);
for (size_t i = 0; i < count; i++) {
printf("%s", buff);
}
}
fclose(fp);
}
return 0;
}
Cant please some one can give a hand to understand better.
Thanks in advance.

How is your solution coming along?
The first that jumps out about the code posted here is that your write the wrong number of bytes. In other words, you don't follow the compression guidelines of the exercise. The instructions for the OSTEP project was to write a 32 bit int (4 bytes) followed by a single ascii character (1 byte).
It helps to set up a test before writing the compressed output. Make sure that what is being written does indeed equal 5 bytes.
#include <stdio.h>
struct token {
u_int32_t count;
u_int8_t ch;
};
size_t numbytes (struct token t) {
/* calculates the number of bytes in a single token */
/* assumes a token has two attributes */
/* should return a value of 5 */
size_t mybytes = sizeof(t.count) + sizeof(t.ch)
return mybytes;
}
I personally found it easier to think about the read procedure in terms of a parser. This is due to the fact that some of the test cases get slightly more complex. You need to effectively manage state between line reads and also between files.

It's a little bit late, but I just did this program for my course's assignment and I want to share my solution. It passes all the available test cases but it's not official from any party (neither from my course nor the project owner).
The code:
#include <stdio.h> // FILE, stdout, fprintf, fwrite, fgetc, EOF, fclose
#include <stdlib.h> // EXIT_*, exit
struct rle_t
{
int l;
char c;
};
void
writerle(struct rle_t rleobj)
{
fwrite((int *)(&(rleobj.l)), sizeof(int), 1, stdout);
fwrite((char *)(&(rleobj.c)), sizeof(char), 1, stdout);
}
struct rle_t
process(FILE *stream, struct rle_t prev)
{
int curr;
struct rle_t rle;
while ((curr = fgetc(stream)) != EOF)
{
if (prev.c != '\0' && curr != prev.c)
{
rle.c = prev.c;
rle.l = prev.l;
prev.l = 0;
writerle(rle);
}
prev.l++;
prev.c = curr;
}
rle.c = prev.c;
rle.l = prev.l;
return rle;
}
int
main(int argc, const char *argv[])
{
if (argc < 2)
{
fprintf(stdout, "wzip: file1 [file2 ...]\n");
exit(EXIT_FAILURE);
}
struct rle_t prev;
prev.c = '\0';
prev.l = 0;
for (int i = 1; i < argc; ++i)
{
FILE *fp = fopen(argv[i], "r");
if (fp == NULL)
{
fprintf(stdout, "wzip: cannot open files\n");
exit(EXIT_FAILURE);
}
struct rle_t rle = process(fp, prev);
prev.c = rle.c;
prev.l = rle.l;
fclose(fp);
}
writerle(prev);
return 0;
}
From the post, I think there is no need to use any function like htonl(3) for this problem. The tricky part is in the writerle() function, which you already got it. But let me explain what I understand:
Use fwrite(3) to write binary format to the stream, in this case, stdout.
First parameter is a pointer, that's why I need &(rleobj.l) and &(rleobj.c).
This line
fwrite((int *)(&(rleobj.l)), sizeof(int), 1, stdout);
tells that I want to write 4 bytes (sizeof(int)) of the first parameter (&(rleobj.l)) to the standard output (stdout) one time (1). The typecast is optional (depending on your compiler and how you want to read your code).
The reason why they require you to do so is because it will separate between the run-length part and the character part.
Let's say you have a simple input file like this:
333333333333333333333333333333333aaaaaaaaaaaa
After encoding without the binary format:
33312a
This is wrong. Because now, it looks like the run-length of the character a is 33312, instead of 33 of 3 and 12 of a.
However, with binary format, those parts are separated:
❯ xxd -b output.z
00000000: 00100001 00000000 00000000 00000000 00110011 00001100 !...3.
00000006: 00000000 00000000 00000000 01100001 ...a
Here, the first four bytes represent the run-length and the next one byte represents the character.
I hope this will help.

Related

How to use fscanf to read a text file including many words and store them into a string array by index

The wordlist.txt is including like:
able
army
bird
boring
sing
song
And I want to use fscanf() to read this txt file line by line and store them into a string array by indexed every word like this:
src = [able army bird boring sing song]
where src[0]= "able", src[1] = "army" and so on. But my code only outputs src[0] = "a", src[1] = "b"... Could someone help me figure out what's going wrong in my code:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL)
{
printf("%s", "File open error");
return 0;
}
char src[1000];
for (int i = 0; i < sizeof(src); i++)
{
fscanf(fp, "%[^EOF]", &src[i]);
}
fclose(fp);
printf("%c", src[0]);
getchar();
return 0;
}
Pretty appreciated!
For example like this.
#include <stdio.h>
#include <string.h>
#include <errno.h>
#define MAX_ARRAY_SIZE 1000
#define MAX_STRING_SIZE 100
int main(int argc, char *argv[]) {
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL) {
printf("File open error\n");
return 1;
}
char arr[MAX_ARRAY_SIZE][MAX_STRING_SIZE];
int index = 0;
while (1) {
int ret = fscanf(fp, "%s", arr[index]);
if (ret == EOF) break;
++index;
if (index == MAX_ARRAY_SIZE) break;
}
fclose(fp);
for (int i = 0; i < index; ++i) {
printf("%s\n", arr[i]);
}
getchar();
return 0;
}
Some notes:
If there is an error, it is better to return 1 and not 0, for 0 means successful execution.
For a char array, you use a pointer. For a string array, you use a double pointer. A bit tricky to get used to them, but they are handy.
Also, a check of the return value of the fscanf would be great.
For fixed size arrays, it is useful to define the sizes using #define so that it is easier to change later if you use it multiple times in the code.
It's reading file one character at a time, Which itself is 4 in size like we see sizeof('a') in word able. Same goes for 'b' and so on. So one approach you can use is to keep checking when there is a space or newline character so that we can save the data before these two things as a word and then combine these small arrays by adding spaces in between and concatenating them to get a single array.

Read 8 Bit Binary Numbers from text File in C

I'm trying to read from a text file in C that contains a list of 8 bit binary numbers to be used in another function.
The text file is formatted like:
01101101
10110110
10101101
01001111
11010010
00010111
00101011
Ect. . .
Heres kinda what i was trying to do
Pseudo code
void bincalc(char 8_bit_num){
//does stuff
}
int main()
{
FILE* f = fopen("test.txt", "r");
int n = 0, i = 0;
while( fscanf(f, "%d ", &n) > 0 ) // parse %d followed by a new line or space
{
bincalc(n);
}
fclose(f);
}
I think i'm on the right track, however any help is appreciated.
This is not the standard track to do it, but it ok. You are scanning the file reading ints so it will read the string and interpret them as decimal number, which you should in turn convert to the corresponding binary, i.e. converting 1010 decimal to 2^3+2^1=9 decimal. This is of course possible, you just need to transform powers of ten to powers of 2 (1.10^3+0.10^2+1.10^1+0.10^0 to 1.2^3+0.2^2+1.2^1+0.2^0). Be careful that this work with 8-bits numbers, but not with too huge ones (16-bits will not).
The more common way is to read the strings and decode the strings directly, or read char by char and make an incremental conversion.
If you want to have a number at the end, I would suggest something like the following:
int i;
char buf[10], val;
FILE* f = fopen("test.txt", "r");
while( ! feof(f) ) {
val = 0;
fscanf(f, "%s", buf);
for( i=0; i<8; i++ )
val = (val << 1) | (buf[i]-48);
/* val is a 8 bit number now representing the binary digits */
bincalc(val);
}
fclose(f);
This is just a short snippet to illustrate the idea and does not catch all corner cases regarding the file or buffer handling. I hope it will help anyway.
I think this is a possible solution, i made it simple so you can understand and adapt to your needs. You only need to repeat this for each line.
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
FILE *file =fopen("data","r");
char *result=calloc(1,sizeof(char));
char line[8];
int i=0;
for(;i<8;i++)
{
char get = (char)getc(file);
if(get == '0')
*result <<= 1;
else if(get == '1')
*result = ((*result<<1)|0x1) ;
}
printf("->%c\n",*result);
return 0;
}
I don't know of a way to specify binary format for fscanf() but you can convert a binary string like this.
#include <stdio.h>
#include <stdlib.h>
void fatal(char *msg) {
printf("%s\n", msg);
exit (1);
}
unsigned binstr2int(char *str) {
int i = 0;
while (*str != '\0' && *str != '\n') { // string term or newline?
if (*str != '0' && *str != '1') // binary digit?
fatal ("Not a binary digit\n");
i = i * 2 + (*str++ & 1);
}
return i;
}
int main(void) {
unsigned x;
char inp[100];
FILE *fp;
if ((fp = fopen("test.txt", "r")) == NULL)
fatal("Unable to open file");
while (fgets(inp, 99, fp) != NULL) {
x = binstr2int(inp);
printf ("%X\n", x);
}
fclose (fp);
return 0;
}
File input
01101101
10110110
10101101
01001111
11010010
00010111
00101011
Program output
6D
B6
AD
4F
D2
17
2B
Read the file line by line, then use strtol() with base 2.
Or, if the file is guaranteed to be not long, you could also read the full contents in (dynamically allocated) memory and use the endptr parameter.

Trying to make program that counts number of bytes in a specified file (in C)

I am currently attempting to write a program that will tell it's user how many times the specified 8-bit byte appears in the specified file.
I have some ground work laid out, but when it comes to making sure that the file makes it in to an array or buffer or whatever format I should put the file data into to check for the bytes, I feel I'm probably very far off from using the correct methods.
After that, I need to check whatever the file data gets put in to for the byte specified, but I am also unsure how to do this.
I think I may be over-complicating this quite a bit, so explaining anything that needs to be changed or that can just be scrapped completely is greatly appreciated.
Hopefully didn't leave out any important details.
Everything seems to be running (this code compiles), but when I try to printf the final statement at the bottom, it does not spit out the statement.
I have a feeling I just did not set up the final for loop correctly at all..
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
//#define BUFFER_SIZE (4096)
main(int argc, char *argv[]){ //argc = arg count, argv = array of arguements
char buffer[4096];
int readBuffer;
int b;
int byteCount = 0;
b = atoi(argv[2]);
FILE *f = fopen(argv[1], "rb");
unsigned long count = 0;
int ch;
if(argc!=3){ /* required number of args = 3 */
fprintf(stderr,"Too few/many arguements given.\n");
fprintf(stderr, "Proper usage: ./bcount path byte\n");
exit(0);
}
else{ /*open and read file*/
if(f == 0){
fprintf(stderr, "File could not be opened.\n");
exit(0);
}
}
if((b <= -1) || (b >= 256)){ /*checks to see if the byte provided is between 0 & 255*/
fprintf(stderr, "Byte provided must be between 0 and 255.\n");
exit(0);
}
else{
printf("Byte provided fits in range.\n");
}
int i = 0;
int k;
int newFile[i];
fseek(f, 0, SEEK_END);
int lengthOfFile = ftell(f);
for(k = 0; k < sizeof(buffer); k++){
while(fgets(buffer, lengthOfFile, f) != NULL){
newFile[i] = buffer[k];
i++;
}
}
if(newFile[i] = buffer[k]){
printf("same size\n");
}
for(i = 0; i < sizeof(newFile); i++){
if(b == newFile[i]){
byteCount++;
}
printf("Final for loop is working???"\n");
}
}
OP is mixing fgets() with binary reads of a file.
fgets() reads a file up to the buffer size provided or reaching a \n byte. It is intended for text processing. The typical way to determine how much data was read via fgets() is to look for a final \n - which may or may not be there. The data read could have embedded NUL bytes in it so it becomes problematic to know when to stop scanning the buffer. on a NUL byte or a \n.
Fortunately this can all be dispensed with, including the file seek and buffers.
// "rb" should be used when looking at a file in binary. C11 7.21.5.3 3
FILE *f = fopen(argv[1], "rb");
b = atoi(argv[2]);
unsigned long byteCount = 0;
int ch;
while ((ch = fgetc(f)) != EOF) {
if (ch == b) {
byteCount++;
}
}
The OP error checking is good. But the for(k = 0; k < sizeof(buffer); k++){ loop and its contents had various issues. OP had if(b = newFile[i]){ which should have been if(b == newFile[i]){
Not really an ANSWER --
Chux corrected the code, this is just more than fits in a comment.
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
struct stat st;
int rc=0;
if(argv[1])
{
rc=stat(argv[1], &st);
if(rc==0)
printf("bytes in file %s: %ld\n", argv[1], st.st_size);
else
{
perror("Cannot stat file");
exit(EXIT_FAILURE);
}
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
The stat() call is handy for getting file size and for determining file existence at the same time.
Applications use stat instead of reading the whole file, which is great for gigantic files.

How can i select the last line of a text file using C

I am trying to find out a way to select the last line of a text file using C (not c++ or c#, just C) and I am having a difficult time finding a way to do this, if anyone could assist me with this problem I would be very grateful, thanks! (btw for a good example of what i am trying to do, this would be similar what to tail -n 1 would be doing in bash)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void main(int argc, char *argv[])
{
FILE *fd; // File pointer
char filename[] = "./Makefile"; // file to read
char buff[1024];
if ((fd = fopen(filename, "r")) != NULL) // open file
{
fseek(fd, 0, SEEK_SET); // make sure start from 0
while(!feof(fd))
{
memset(buff, 0x00, 1024); // clean buffer
fscanf(fd, "%[^\n]\n", buff); // read file *prefer using fscanf
}
printf("Last Line :: %s\n", buff);
}
}
I'm using Linux.
CMIIW
No direct way, but my preferred method is:
Go to the end of the file
Read last X bytes
If they contain '\n' - you got your line - read from that offset to the end of the file
Read X bytes before them
back to 3 until match found
If reached the beginning of the file - the whole file is the last line
E.g.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef max
#define max(a, b) ((a)>(b))? (a) : (b)
#endif
long GetFileSize(FILE *fp){
long fsize = 0;
fseek(fp,0,SEEK_END);
fsize = ftell(fp);
fseek(fp,0,SEEK_SET);//reset stream position!!
return fsize;
}
char *lastline(char *filepath){
FILE *fp;
char buff[4096+1];
int size,i;
long fsize;
if(NULL==(fp=fopen(filepath, "r"))){
perror("file cannot open at lastline");
return NULL;
}
fsize= -1L*GetFileSize(fp);
if(size=fseek(fp, max(fsize, -4096L), SEEK_END)){
perror("cannot seek");
exit(1);
}
size=fread(buff, sizeof(char), 4096, fp);
fclose(fp);
buff[size] = '\0';
i=size-1;
if(buff[i]=='\n'){
buff[i] = '\0';
}
while(i >=0 && buff[i] != '\n')
--i;
++i;
return strdup(&buff[i]);
}
int main(void){
char *last;
last = lastline("data.txt");
printf("\"%s\"\n", last);
free(last);
return 0;
}
If you are using *nix operating system, you can use the command 'last'. See 'last' man page for details.
If you want integrate the functionality inside another program, you can use 'system' call to execute 'last' and get it's result.
A simple and inefficient way to do it is to read each line into a buffer.
When the last read gives you EOF, you have the last line in the buffer.
Binyamin Sharet's suggestion is more efficient, but just a bit harder to implement.

Binary Translation

#include <stdio.h>
int main() {
unsigned char data[1];
FILE *f = fopen("bill.jpg", "rb");
while (!feof(f)) {
if (fread(data, 1, 1, f) > 0) {
printf("0x%02x\n", data[0]);
}
}
fclose(f);
}
Is this the right? I am worried because if I view the file using hexdump, I get completely different output.
That should correctly print the first byte of the file in hex.
Check the documentation for the Hexdump utility used, or tell us which platform is being used. Some dump utilities display the bytes in reverse order on each line to make little-endian reading somewhat more intuitive—once you understand what it is doing.
I'm sorry, but no -- while (!feof(f)) is essentially always wrong -- it'll typically appear to read the last item in the file twice. Here's a reasonably usable hex dumper I wrote a few years ago:
/* public domain by Jerry Coffin, tested with MS C 10.0 and BC 4.5
*/
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv) {
unsigned long offset = 0;
FILE *input;
int bytes, i, j;
unsigned char buffer[16];
char outbuffer[60];
if ( argc < 2 ) {
fprintf(stderr, "\nUsage: dump filename [filename...]");
return EXIT_FAILURE;
}
for (j=1;j<argc; ++j) {
if ( NULL ==(input=fopen(argv[j], "rb")))
continue;
printf("\n%s:\n", argv[j]);
while (0 < (bytes=fread(buffer, 1, 16, input))) {
sprintf(outbuffer, "%8.8lx: ", offset+=16);
for (i=0;i<bytes;i++) {
sprintf(outbuffer+10+3*i, "%2.2X ",buffer[i]);
if (!isprint(buffer[i]))
buffer[i] = '.';
}
printf("%-60s %*.*s\n", outbuffer, bytes, bytes, buffer);
}
fclose(input);
}
return 0;
}

Resources