LZW Sequence implementation - c

I am absolutely lost on what I am supposed to do in this C programming assignment I have. I am pretty sure I created the newSequence function wrong, because there isn't much clarity, for me, on what all the variables mean. Can someone guide me on how I need to create a newSequence?
#include <stdlib.h>
#include "sequencecopy.h"
Sequence* newSequence(unsigned char firstByte, unsigned long long hashSize) {
Sequence *s = malloc(sizeof(Sequence));
s->length = 1; // each letter is one byte
s->code = (unsigned int) firstByte; // casting char input into 4 byte value
s->hash = (unsigned long long) s->code; // casting code into 8 byte value
s->hash = ((s->hash << 5) + s->hash) + s->code; // hash function
s->bucket = s->hash % hashSize; // hash code modded to hash table size
// s->bytes[0] = firstByte;
return s;
}
void deleteSequence(Sequence* sequence) {
free(sequence);
}
Sequence* copySequenceAppend(Sequence* sequence, unsigned char addByte, unsigned int hashSize) {
return NULL;
}
unsigned int outputSequence(Sequence* sequence,
void (*writeFunc)(unsigned char c, void* context), void* context) {
return 0;
}
bool identicalSequences(Sequence* a, Sequence* b) {
return true;
}
This is the header file sequence.h
#ifndef SEQUENCE_H
#define SEQUENCE_H
#include <stdbool.h>
typedef struct _sequence {
unsigned int length; // number of bytes in the sequence
unsigned int usage; // number of times the sequence is used
unsigned int code; // compression code assigned in the dictionary
unsigned int wordCount; // number of words holding the bytes
unsigned long long hash; // raw hash code for the sequence
unsigned long long bucket; // hash code modded to the hash table size
struct _sequence* next; // next sequence in a list of sequences (collisions)
union { // union data permits both character and word accessability
unsigned char bytes[1 << SEQUENCE_ALLOC]; // buffer for characters, multiple of 8
unsigned long long words[1]; // for accessing data as 8 byte words
} data;
} Sequence;
/* create a new, one character Sequence using char firstByte as the character.
hashSize is the size of the hashTable and if non-zero, a hash
is computed for the new Sequence. */
Sequence* newSequence(unsigned char firstByte, unsigned long long hashSize);
/* either frees sequence or places it on a list of idle sequences for possible reuse */
void deleteSequence(Sequence* sequence);
/* creates a new Sequence containing the old sequence bytes with newByte appended to the end,
hashSize has the same meaning and function as in newSequence() above. */
Sequence* copySequenceAppending(Sequence* sequence, unsigned char newByte, unsigned long long hashSize);
/* write the data bytes of sequence to the FILE* fd, return the number of bytes written. */
unsigned int outputSequence(Sequence* sequence,
void (*writeFunc)(unsigned char c, void* context),
void* context);
/* returns true if a and b contain identical sequences of data bytes, false otherwise */
bool identicalSequences(Sequence* a, Sequence* b);
#endif

Related

use memcpy to fill a struct taking data from an array in C

The following task is from an exam I took in engineering school (mechanical engineering):
You get an array unsigned char buffer[128]; used to read data from a source byte by byte, containing data of the structure:
struct Pixel {
unsigned char x;
unsigned char y;
unsigned char greyValue;
};
The task is: Create an instance of a pixel and copy the data content from the header of the buffer using "memcpy".
My aproach does not seem to work:
#include <stdio.h>
#include <string.h>
struct Pixel {
unsigned char x;
unsigned char y;
unsigned char greyValue;
};
int main()
{
unsigned char buffer[128] = {2,4,44};
struct Pixel singlePixel;
memcpy(singlePixel, buffer, 3);
printf("singlePixel.x = %d\n", singlePixel.x);
printf("singlePixel.y = %d\n", singlePixel.y);
printf("singlePixel.greyValue = %d\n", singlePixel.greyValue);
return 0;
}
I would expect that singlePixel.x = 2, singlePixel.y = 4 and singlePixel.greyValue = 44.
When debugging I get the error: incompatible type for argument 1 of ‘memcpy’
I'm also not at all sure if my approach is up to the task, since I don't understand exactly how this should works with the buffer...

Clear just bit field members of a struct?

I have a struct like the following:
struct Foo {
unsigned int id;
unsigned int flag_1 : 1;
unsigned int flag_2 : 1;
unsigned int flag_3 : 1;
// Some arbitrary number of further flags. Code is
// automatically generated and number will vary.
// Notably, it may be more than an int's worth.
int some_data;
float some_more_data;
// ...
};
From time to time, I need to reset all the flags to zero while preserving the rest of the struct. One way is obviously to set each flag to 0 individually, but it feels like there ought to be a way to do it in one fell swoop. Is that possible?
(Note that I am open to not using bit fields, but this is code that will sometimes run on memory-contrained systems, so the memory savings are very appealing.)
Edit:
There is a similar question here: Reset all bits in a c bitfield
However, the struct in that question is entirely bitfields. I cannot simply memset the entire struct to zero here, and the other answer involving unions is not guaranteed to work, especially if there are more than an int's worth of flags.
Just use a separate struct for the flags:
struct Foo_flags {
unsigned int flag_1 : 1;
unsigned int flag_2 : 1;
unsigned int flag_3 : 1;
// ...
};
struct Foo {
unsigned int id;
struct Foo_flags flags;
int some_data;
float some_more_data;
// ...
};
Or even a simpler nested struct:
struct Foo {
unsigned int id;
struct {
unsigned int flag_1 : 1;
unsigned int flag_2 : 1;
unsigned int flag_3 : 1;
// ...
} flags;
int some_data;
float some_more_data;
// ...
};
Then, later in your code:
struct Foo x;
// ...
x.flags.flag_1 = 1;
// ...
memset(&x.flags, 0, sizeof(x.flags));
With some minor adjustments, you can use the offsetof macro to find the beginning and end of the "flag" data within the structure, then use memset to clear the relevant memory. (Note that you cannot use offsetof directly on bitfields, hence the addition of the flag_beg member!)
Here's a working example:
#include <stdio.h>
#include <stddef.h> // defines offsetof
#include <string.h> // declares memset
struct Foo {
unsigned int id;
unsigned int flag_beg; // Could be unsigned char to save space
unsigned int flag_1 : 1;
unsigned int flag_2 : 1;
unsigned int flag_3 : 1;
unsigned int flag_end; // Could be unsigned char to save space
// Some arbitrary number of further flags. Code is
// automatically generated and number will vary.
// Notably, it may be more than an int's worth.
int some_data;
float some_more_data;
// ...
};
#define FBEG (offsetof(struct Foo, flag_beg))
#define FEND (offsetof(struct Foo, flag_end))
int main()
{
struct Foo f;
f.id = 3; f.flag_1 = 1; f.flag_2 = 0; f.flag_3 = 1;
f.some_data = 33; f.some_more_data = 16.2f;
printf("%u %u %u %u %d %f\n", f.id, f.flag_1, f.flag_2, f.flag_3, f.some_data, f.some_more_data);
memset((char*)(&f) + FBEG, 0, FEND - FBEG);
printf("%u %u %u %u %d %f\n", f.id, f.flag_1, f.flag_2, f.flag_3, f.some_data, f.some_more_data);
return 0;
}

Converting unsigned char pointer of data to struct containing ints

I am trying to cast this pointer of data to my struct and the actual value populate in the struct.
unsigned char *data = "00000001000000020000000300000004AE93KD93KD91Q830DMNE03KEkdaredgreenblueorangeyellow";
typedef struct mystruc {
int a;
int b;
int c;
int d;
} mystruc;
mystruct ms = (mystruct *)data;
printf("%i", ms->a);
Output:
808464432
I am trying to find out how to fill in a, b, c, d with the actual values 1, 2, 3, 4
I would like the output to be:
1
I will also need to later access the rest of the data.
Use sscanf() to parse the numbers in the string.
mystruct ms;
sscanf(data, "%8d%8d%8d%8d", &ms.a, &ms.b, &ms.c, &ms.d);
%8d means to parse an 8-character decimal field as an int. If it's actually hexadecimal, change it to %8x.
Your code is interpreting the character codes in the string as the binary representation of the structure members, it doesn't parse it.
Your unsigned char is one byte wide, so "00000001" will be 3030303031 in hex, because the ASCII code for '0' is 0x30 in hex, and the ASCII for '1' is 0x31.
Your int is 4 bytes wide, so it'll capture the first 4 bytes of data, which will be 30303030 in hex, or 808464432 in decimal.
This, however, will work on a little-endian machine:
#include <stdio.h>
typedef struct mystruct {
int a;
int b;
int c;
int d;
} mystruct;
unsigned char *data = "\1\0\0\0"; // use octal numbers, not ASCII, also note the reversed order
int main(void) {
mystruct *ms = (mystruct *)data;
printf("%i", ms->a); // outputs: 1
}

Judy Array judysl(3) strange behavior

I'm testing the judy arrays implementation on ubuntu 11.10 "libjudy-dev".
I'm encounter with a strange behavior, possible bug. related to the size of val and the key.
In the example, if i use the struct TEST with only 1 int with large keys works, but if i use the 10 int struct with the same key it doesn't, the 10 int struct works ok with small keys.
judy manpage
In the man page said that the string can be any size.
#include <stdio.h>
#include <string.h>
#include <Judy.h>
/*struct TEST {
unsigned int size9;
};*/
struct TEST {
unsigned int size0;
unsigned int size1;
unsigned int size2;
unsigned int size3;
unsigned int size4;
unsigned int size5;
unsigned int size6;
unsigned int size7;
unsigned int size8;
unsigned int size9;
};
int main()
{
struct TEST *val;
char key[1024];
Pvoid_t array = NULL;
//strcpy(key, "0123456789_0123456789");
strcpy(key, "0123456789_0123456789_0123456789");
JSLI(val, array, key);
val->size9 = 10;
val = NULL;
JSLG(val, array, key);
if(val == NULL) {
printf("NULL\n");
} else {
printf("%u\n", val->size9);
}
return 0;
}
JudySL "maps" a string to a word in RAM. This word is used as a "word_t" or a "pointer to more memory". The routines return (val in your case) a pointer to the word available for your use.
Your code makes that pointer (val) a pointer to a struct of greater size than one word -- thus
destroying part of the internal Judy data structure with the statement "val->size9 = 10;".
Keep in mind that the "key" is a string, and the PValue is a pointer to an object of size word. If you want *PValue to point to struct TEST and it is bigger than a word_t then memory must be allocated for it. Your test program seems to want to map a string to a struct TEST--
struct TEST is bigger than a word_t.
Doug Baskins

How to access members of a `struct' according to a variable integer in C?

Suppose I have this struct (which incidentally contain bit-fields, but you shouldn't care):
struct Element {
unsigned int a1 : 1;
unsigned int a2 : 1;
...
unsigned int an : 1;
};
and I want to access the i'th member in a convenient way. Let's examine a retrieval solution.
I came up with this function:
int getval(struct Element *ep, int n)
{
int val;
switch(n) {
case 1: val = ep->a1; break;
case 2: val = ep->a2; break;
...
case n: val = ep->an; break;
}
return val;
}
But I suspect that there is a much simpler solution. Something like array accessing style, maybe.
I tried to do something like that:
#define getval(s,n) s.a##n
But expectedly it doesn't work.
Is there a nicer solution?
Unless you have specific knowledge of the underlying structure of the struct, there is no way to implement such a method in C. There are all sorts of problems that will get in the way including
Members of different sizes
Packing issues
Alignment issues
Tricks like bitfields will be problematic
You're best off implementing a method by hand for your struct which has a deep understanding of the internal members of the structure.
If every field in your struct is an int, then you should basically be able to say
int getval(struct Element *ep, int n)
{
return *(((int*)ep) + n);
}
This casts the pointer to your struct to a pointer to an array if integers, then accesses the nth element of that array. Since everything in your struct seems to be an integer, this is perfectly valid. Note that this will fail horribly if you ever have a non-int member.
A more general solution would be to maintain an array of field offsets:
int offsets[3];
void initOffsets()
{
struct Element e;
offsets[0] = (int)&e.x - (int)&e;
offsets[1] = (int)&e.y - (int)&e;
offsets[2] = (int)&e.z - (int)&e;
}
int getval(struct Element *ep, int n)
{
return *((int*)((int)ep+offsets[n]));
}
This will work in the sense that you'll be able to call getval for any of the int fields of your struct, even if you have other non-int fields in your struct, since the offsets will all be correct. However, if you tried to call getval on one of the non-int fields it would return a completely wrong value.
Of course, you could write a different function for each data type, e.g.
double getDoubleVal(struct Element *ep, int n)
{
return *((double*)((int)ep+offsets[n]));
}
and then just call the proper function for whichever datatype you'd want. Incidentally, if you were using C++ you could say something like
template<typename T>
T getval(struct Element *ep, int n)
{
return *((T*)((int)ep+offsets[n]));
}
and then it would work for whatever datatype you'd want.
If your struct was anything except bitfields, you could just use array access, if I'm right in remembering that C guarantees that a series of members of a struct all of the same type, has the same layout as an array. If you know which bits in what order your compiler stores bitfields into integer types, then you could use shift/mask ops, but that's then implementation-dependent.
If you want to access bits by variable index, then it's probably best to replace your bitfields with an integer containing flag bits. Access by variable really isn't what bitfields are for: a1 ... an are basically independent members, not an array of bits.
You could do something like this:
struct Element {
unsigned int a1 : 1;
unsigned int a2 : 1;
...
unsigned int an : 1;
};
typedef unsigned int (*get_fn)(const struct Element*);
#define DEFINE_GETTER(ARG) \
unsigned int getter_##ARG (const struct Element *ep) { \
return ep-> a##ARG ; \
}
DEFINE_GETTER(1);
DEFINE_GETTER(2);
...
DEFINE_GETTER(N);
get_fn jump_table[n] = { getter_1, getter_2, ... getter_n};
int getval(struct Element *ep, int n) {
return jump_table[n-1](ep);
}
And some of the repetition could be avoided by the trick where you include the same header multiple times, each time having defined a macro differently. The header expands that macro once for each 1 ... N.
But I'm not convinced it's worth it.
It does deal with JaredPar's point that you're in trouble if your struct mixes different types - here all the members accessed via a particular jump table must of course be of the same type, but they can have any old rubbish in between them. That still leaves the rest of JaredPar's points, though, and this is a lot of code bloat for really no benefit compared with the switch.
No, there is no simple way to do this easier. Especially for bitfields, that are hard to access indirectly through pointers (you cannot take the address of a bitfield).
You can of course simplify that function to something like this:
int getval(const struct Element *ep, int n)
{
switch(n)
{
case 1: return ep->a1;
case 2: return ep->a2;
/* And so on ... */
}
return -1; /* Indicates illegal field index. */
}
And it seems obvious how the implementation can be further simplified by using a preprocessor macro that expands to the case-line, but that's just sugar.
If the structure really is as simple as described, you might use a union with an array (or a cast to an array) and some bit-access magic (as in How do you set, clear and toggle a single bit in C?).
As Jared says, the general case is hard.
I think your real solution is to not use bitfields in your struct, but instead define either a set type or a bit array.
I suggest code generation. If your structures don't contain huge amount of fields you can auto generate routines for each field or for a range of fields
and use them like:
val = getfield_aN( myobject, n );
or
val = getfield_foo( myobject );
If you have
Only bitfields, or all the bitfields first in your struct
less than 32 (or 64) bitfields
then this solution is for you.
#include <stdio.h>
#include <stdint.h>
struct Element {
unsigned int a1 : 1;
unsigned int a2 : 1;
unsigned int a3 : 1;
unsigned int a4 : 1;
};
#define ELEMENT_COUNT 4 /* the number of bit fields in the struct */
/* returns the bit at position N, or -1 on error (n out of bounds) */
int getval(struct Element* ep, int n)
{
if(n > ELEMENT_COUNT || n < 1)
return -1;
/* this union makes it possible to access bit fields at the beginning of
the struct Element as if they were a number.
*/
union {
struct Element el;
uint32_t bits;
} comb;
comb.el = *ep;
/* check if nth bit is set */
if(comb.bits & (1<<(n-1))) {
return 1;
} else {
return 0;
}
}
int main(int argc, char** argv)
{
int i;
struct Element el;
el.a1 = 0;
el.a2 = 1;
el.a3 = 1;
el.a4 = 0;
for(i = 1; i <= ELEMENT_COUNT; ++i) {
printf("el.a%d = %d\n", i, getval(&el, i));
}
printf("el.a%d = %d\n", 8, getval(&el, 8));
return 0;
}
Based on eli-courtwright solution but without using array of field offsets
......
if you have a structure containing pointer field like this, maybe you could write:
struct int_pointers
{
int *ptr1;
int *ptr2;
long *ptr3;
double *ptr4;
std::string * strDescrPtr;
};
Then you know that every pointer has a 4 bytes offset from a pointer to the structure, so you can write:
struct int_pointers ptrs;
int i1 = 154;
int i2 = -97;
long i3 = 100000;
double i4 = (double)i1/i2;
std::string strDescr = "sample-string";
ptrs.ptr1 = &i1;
ptrs.ptr2 = &i2;
ptrs.ptr3 = &i3;
ptrs.ptr4 = &i4;
ptrs.strDescrPtr = &strDescr;
then, for example, for a int value you can write:
int GetIntVal (struct int_pointers *ep, int intByteOffset)
{
int * intValuePtr = (int *)(*(int*)((int)ep + intByteOffset));
return *intValuePtr;
}
Calling it by:
int intResult = GetIntVal(&ptrs,0) //to retrieve the first int value in ptrs structure variable
int intResult = GetIntVal(&ptrs,4) //to retrieve the second int value in ptrs structure variable
and so on for the others structure fields values (writing other specific functions and using correct bytes offset value (multiple of 4)).
Although the OP specifies that we shouldn't care about the contents of the struct, since they are just bitfields would it be possible to use a char or int (or whatever data type has the size required) to create an n-bit "array" in this case?
void writebit(char *array, int n)
{
char mask = (1 << n);
*array = *array & mask;
}
with the char types replaced with a larger type if a longer "array" was needed. Not sure this is a definitive solution in other structs but it should work here, with a similar readbit funcition.
If you want to access your structure using both element index:
int getval(struct Element *ep, int n)
and by name:
ep->a1
then you are stuck with some hard to maintain switch like method that everyone has suggested.
If, however, all you want to do is access by index and never by name, then you can be a bit more creative.
First off, define a field type:
typedef struct _FieldType
{
int size_in_bits;
} FieldType;
and then create a structure definition:
FieldType structure_def [] = { {1}, {1}, {1}, {4}, {1}, {0} };
The above defines a structure with five elements of size 1, 1, 1, 4 and 1 bits. The final {0} marks the end of the definition.
Now create an element type:
typedef struct _Element
{
FieldType *fields;
} Element;
To create an instance of an Element:
Element *CreateElement (FieldType *field_defs)
{
/* calculate number of bits defined by field_defs */
int size = ?;
/* allocate memory */
Element *element = malloc (sizeof (Element) + (size + 7) / 8); /* replace 7 and 8 with bits per char */
element->fields = field_defs;
return element;
}
And then to access an element:
int GetValue (Element *element, int field)
{
/* get number of bits in fields 0..(field - 1) */
int bit_offset = ?;
/* get char offset */
int byte_offset = sizeof (Element) + bit_offset / 8;
/* get pointer to byte containing start of data */
char *ptr = ((char *) element) + byte_offset;
/* extract bits of interest */
int value = ?;
return value;
}
Setting values is similar to getting values, only the final part needs changing.
You can enhance the above by extending the FieldType structure to include information about the type of value stored: char, int, float, etc, and then write accessors for each type which checks the required type against the defined type.
Why not build getval() in to the struct?
struct Whang {
int a1;
int a2;
int getIth(int i) {
int rval;
switch (i) {
case 1: rval = a1; break;
case 2: rval = a2; break;
default : rval = -1; break;
}
return rval;
}
};
int _tmain(int argc, _TCHAR* argv[])
{
Whang w;
w.a1 = 1;
w.a2 = 200;
int r = w.getIth(1);
r = w.getIth(2);
return 0;
}
getIth() would have knowledge of the internals of Whang, and could deal with whatever it contained.

Resources