parsing a string into tokens without strtok/lexer - c

I want to parse a string into an array of tokens . '\n' and ';' are delimiters , for e.g. :
hello;hello
world
should be converted to an array containing: {"hello","hello","world"}.
I tried many different methods for doing this and always I fail (since it needs a dynamic array of char * I have trouble with implementing it).
Please note that I cannot use strtok or lexical analyzer.
How may I do this ? Any points ?
EDIT : here is one of methods I tried to use but I get segmentation fault (maybe a memory access issue somewhere in my code) :
#include <stdio.h>
#include <malloc.h>
#include <fcntl.h>
#include <string.h>
typedef struct {
int fd;
char *path;
int size;
char *mem;
struct stat st;
} file;
file *readfile(char *path) {
file *a=malloc(sizeof(file));
a->path=path;
a->fd=open(a->path,O_RDONLY);
if(a->fd<0) return 0;
fstat(a->fd,&a->st);
a->size=a->st.st_size;
a->mem=malloc(a->size);
read(a->fd,a->mem,a->size);
return a;
}
void releasefile(file *a) {
free(a->mem);
close(a->fd);
free(a);
}
char **parse(int *w,file *a) {
int i,j=0;
w=0;
for(i=0;i<=a->size;i++) {
if(a->mem[i]=='\n' || a->mem[i]==';') { a->mem[i]='\0'; j++; }
}
char **out=malloc(sizeof(char *)*j);
for(i=0;i<=a->size;i++) {
if(a->mem[i-1]!='\0') continue;
out[*w]=malloc(strlen(a->mem+i)+1);
memcpy(out[*w],a->mem+i,strlen(a->mem+i)+1);
w++;
return out;
}
int main(int argc,char **argv) {
file *a=readfile(argv[1]);
int *w=malloc(sizeof(int));
char **tokens=parse(w,a);
int i;
for(i=0;i<=*w;i++) {
puts(tokens[i]);
}
releasefile(a);
// ATM no need to check for mem leaks :)
}
Algorithm description : read file, put \0 where you see a delimiter, start and push tokens seprated by \0 into an array.

What has happened to computer science?
Anyway write a FSA - http://en.wikipedia.org/wiki/Finite-state_machine
Can do this using a table

Related

Stack smashing detected in C - why does this happen?

I have the following function, which, given a string, should find the most recurrent couple of letters in it and store the result in a different string.
For example - for the string "ababa", the most recurrent couple would be "ba", and for "excxexd" it would be "ex". This is the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void printError(){
printf("Error: please check your input\n");
}
bool isLexicographicallyPreceding(char couple1[], char couple2[])
{
if (strcmp(couple1, couple2)>=0) return true;
return false;
}
void coupleDetector(int length, char word[], char result[])
{
char couples[length-1][2];
for (int i=0; i<length-1; i++)
{
char couple[2] = {word[i], word[i+1]};
strcpy(couples[i], couple);
}
char element[]="";
int count=0;
for (int j=0; j<length-1; j++)
{
char tempElement[2];
strcpy(tempElement,couples[j]);
int tempCount=0;
for (int p=0; p<length-1; p++)
{
if (couples[p]==tempElement) tempCount++;
}
if (tempCount>count)
{
strcpy(element, tempElement);
count=tempCount;
}
if (tempCount==count)
{
if (isLexicographicallyPreceding(tempElement,element) == true) strcpy(element, tempElement);
}
}
strcpy(result,element);
}
int main() {
//Supposed to print "ba" but instead presents "stack smashing detected".
int length=5;
char arr[] = "ababa";
char mostCommonCouple[2];
coupleDetector(length,arr,mostCommonCouple);
printf("%s", mostCommonCouple);
return 0;
}
The code compiles without errors, but for some reason does not work as intended but prints out "stack smashing detected". Why would that be? Advices would be very helpful.
Thanks.
In trying out your program, I found a few of your character arrays undersized. Character arrays (strings) need to be sized large enough to also include the null terminator value in the array. So in many locations, having a two-character array size is not sufficient and was the cause of the stack smashing. With that in mind, following is a refactored version of your program.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
void printError()
{
printf("Error: please check your input\n");
}
bool isLexicographicallyPreceding(char couple1[], char couple2[])
{
if (strcmp(couple1, couple2)>=0) return true;
return false;
}
void coupleDetector(int length, char word[], char result[])
{
char couples[length-1][3];
for (int i=0; i<length-1; i++)
{
char couple[3] = {word[i], word[i+1], '\0'};
strcpy(couples[i], couple);
}
char element[3]; /* Define the character array */
strcpy(element, ""); /* Then initialize it if need be */
int count=0;
for (int j=0; j<length-1; j++)
{
char tempElement[3];
strcpy(tempElement,couples[j]);
int tempCount=0;
for (int p=0; p<length-1; p++)
{
if (couples[p]==tempElement) tempCount++;
}
if (tempCount>count)
{
strcpy(element, tempElement);
count=tempCount;
}
if (tempCount==count)
{
if (isLexicographicallyPreceding(tempElement,element)) strcpy(element, tempElement);
}
}
strcpy(result,element);
}
int main()
{
//Supposed to print "ba" but instead presents "stack smashing detected".
int length=5;
char arr[] = "ababa";
char mostCommonCouple[3]; /* Notice size requirement to also contain the '\0' terminator */
coupleDetector(length,arr,mostCommonCouple);
printf("%s\n", mostCommonCouple);
return 0;
}
Here are some key points.
Viewing the code, most sizes for arrays was enlarged by one to accommodate storage of the null terminator.
Work fields such as "element" need to be defined to their proper size so that subsequent usage won't also result in stack smashing.
Testing out the refactored code resulted in the following terminal output.
#Vera:~/C_Programs/Console/Recurrent/bin/Release$ ./Recurrent
ba
So to reiterate, be cognizant that character arrays normally need to be defined to be large enough to contain the largest expected string plus one for the null terminator.
Give that a try and see if it meets the spirit of your project.

C programming : Creating substring

I'm new in C and I need some explanation on what I am doing wrong.
I'm trying to iterate over a string and find the first '\' then make a substring from that place in the array.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main() {
struct info{
char* name;
char* type;
char* path;
};
struct info user1;
char* a = "/home/users/user1";
for (int i = strlen(a) ; i < 0 ; i--) {
printf("%d",i);
if(strcmp(a[i],'/')==0){
strncpy(a,user1.name,i);
break;
}
}
return 0;
}
There are many errors I will explain them one by one. The code will be something like this.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void) {
struct info{
char* name;
char* type;
char* path;
};
struct info user1;
user1.name = malloc(40);
if( user1.name == NULL){
fprintf(stderr, "%s\n","Error in malloc" );
exit(1);
}
const char* a = "/home/users/user1";
for(int i = strlen(a) -1; i >= 0 ; i--) {
if(a[i]=='/'){
strncpy(user1.name,a+i+1,i);
user1.name[i]='\0';
break;
}
}
printf("%s\n",user1.name );
free(user1.name);
return 0;
}
Things you did wrong
There was no memory allocated to name it was simply an uninitialized pointer. Here we have allocated memory to it.
Second thing, strcmp as the name suggests compares null terminated char array not char-s. It can be done with simple == operator.
The copy part is modified to only copy the user name part nothing else. That's why we have incremented the pointer to point to the correct position.
You forgot to check the return value of malloc and then you should free the allocated memory.
Also you can't modify a string literal because it stays in non-modifiable portion of the memory.
try this,
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main() {
struct info{
char* name;
char* type;
char* path;
};
struct info user1;
user1.name = malloc(10);
char* a = "/home/users/user1";
int len=strlen(a);
for (int i = 0; i < len; i++) {
printf("%d",i);
if(a[i]=='/'){
strncpy(user1.name,a+i+1,i);
user1.name[i]='\0';
break;
}
}
return 0;
}

C : Program prints weird stuff in output, after deleting printf from char function [duplicate]

This question already has answers here:
How to access a local variable from a different function using pointers?
(10 answers)
Closed 5 years ago.
I was writing a program in c for vignere cipher, when in a function to generate the key as the same length of input name,i encountered a bug where if i remove the "printf" line displaying the length of input string it, prints weird stuff on the sceen, it happens only when i delete that "printf" line from the GenKey() function.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *GenKey(char *key, char *source){
int i=0,j=0;
char ReturnKey[strlen(source)];
printf("%d\n",strlen(source)); // THIS LINE HERE CAUSES PROBLEM
for(i=0;i<strlen(source)-1;i++){
if(j==strlen(key)){
j=0;
}
ReturnKey[i]=key[j];
j++;
}
return ReturnKey;
}
int main()
{
int i;
char name[10000];
char container[10000];
char VigKey[]="INFERNO";
char *NamePtr;
char *KeyPtr;
printf("give a name: ");
fgets(name,10000,stdin);
char GeneratedKey[strlen(name)];
KeyPtr=VigKey;
NamePtr=name;
strcpy(GeneratedKey,GenKey(KeyPtr,NamePtr));
printf("%s",GeneratedKey);
}
Output(Before deleting that line):
give a name: ATTACKATDAWN
13
INFERNOINFER
Now i delete that line
char *GenKey(char *key, char *source){
int i=0,j=0;
char ReturnKey[strlen(source)];
// NOW I HAVE DELETED THAT LINE
for(i=0;i<strlen(source)-1;i++){
if(j==strlen(key)){
j=0;
}
ReturnKey[i]=key[j];
j++;
}
return ReturnKey;
}
Output(After deleting that line):
give a name: ATTACKATDAWN
INFERNOINFERα╫`
Try creating the ReturnKey character array on the heap with malloc like this:
char *GenKey(char *key, char *source){
int i=0,j=0;
char *ReturnKey = malloc(sizeof(char) * strlen(source));
for(i=0;i<strlen(source)-1;i++){
if(j==strlen(key)){
j=0;
}
ReturnKey[i]=key[j];
j++;
}
return ReturnKey;
}
Before when you were creating ReturnKey, you were doing so as a local variable which only lives in the context of that function. Even though you would see your word still there, that was just because it was still in that position in memory but was no longer being referencing by an object.
When you create a dynamic array with a function like malloc, you are creating it on the so called "heap" which doesn't get released when it goes out of scope, this way you can return it from the function (you actually return a pointer to that location in memory).
When using malloc note that the memory IS NOT RELEASED, and thus you must at some point later release it yourself by calling free otherwise you will leak memory.
This is what the full code might look like:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *GenKey(char *key, char *source){
int i=0, j=0;
// malloc takes the size of the data type * the length
char *ReturnKey = malloc(sizeof(char) * strlen(source));
for(i=0;i<strlen(source)-1;i++){
if(j==strlen(key)){
j=0;
}
ReturnKey[i]=key[j];
j++;
}
return ReturnKey;
}
int main()
{
int i;
char name[10000];
char container[10000];
char VigKey[]="INFERNO";
char *NamePtr;
char *KeyPtr;
printf("give a name: ");
fgets(name,10000,stdin);
KeyPtr=VigKey;
NamePtr=name;
char *GeneratedKey = GenKey(KeyPtr,NamePtr);
printf("%s",GeneratedKey);
free(GeneratedKey); // IMPORTANT!!!
}
Here is a more in depth article on using malloc and free:
https://www.codingunit.com/c-tutorial-the-functions-malloc-and-free

How to make a copy of a string and return its address, assign that to a pointer and print the new string in C? [duplicate]

This question already has answers here:
Passing address of array as a function parameter
(6 answers)
Closed 9 years ago.
I'm writing a function that gets a string, allocates memory on the heap that's enough to create a copy, creates a copy and returns the address of the beginning of the new copy.
In main I would like to be able to print the new copy and afterwards use free() to free the memory. I think the actual function works although I am not the char pointer has to be static, or does it?
The code in main does not work fine...
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
int make_copy(char arr[]);
int main()
{
char arrr[]={'a','b','c','d','e','f','\0'};
char *ptr;
ptr=make_copy(arrr);
printf("%s",ptr);
getchar();
return 0;
}
int make_copy(char arr[])
{
static char *str_ptr;
str_ptr=(char*)malloc(sizeof(arr));
int i=0;
for(;i<sizeof str_ptr/sizeof(char);i++)
str_ptr[i]=arr[i];
return (int)str_ptr;
}
OK, so based on the comments. A revised version:
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
char* make_copy(char arr[]);
int main()
{
char arrr[]={"abcdef\0"};
char *ptr=make_copy(arrr);
printf("%s",ptr);
getchar();
return 0;
}
char* make_copy(char arr[])
{
static char *str_ptr;
str_ptr=(char*)malloc(strlen(arr)+1);
int i=0;
for(;i<strlen(arr)+1;i++)
str_ptr[i]=arr[i];
return str_ptr;
}
Or even better:
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char* make_copy(char arr[]);
int main()
{
char arrr[]={"abcdef\0"};
printf("%s",make_copy(arrr));
getchar();
return 0;
}
char* make_copy(char arr[])
{
char *str_ptr;
str_ptr=(char*)malloc(strlen(arr)+1);
return strcpy(str_ptr,arr);
}
You're on the right track, but there are some issues with your code:
Don't use int when you mean char *. That's just wrong.
Don't list characters when defining a string, write char arrr[] = "abcdef";
Don't scale string alloations by sizeof (char); that's always 1 so it's pointless.
Don't re-implement strcpy() to copy a string.
Don't cast the return value of malloc() in C.
Don't make local variables static for no reason.
Don't use sizeof on an array passed to a function; it doesn't work. You must use strlen().
Don't omit including space for the string terminator, you must add 1 to the length of the string.
UPDATE Your third attempt is getting closer. :) Here's how I would write it:
char * make_copy(const char *s)
{
if(s != NULL)
{
const size_t size = strlen(s) + 1;
char *d = malloc(size);
if(d != NULL)
strcpy(d, s);
return d;
}
return NULL;
}
This gracefully handles a NULL argument, and checks that the memory allocation succeeded before using the memory.
First, don't use sizeof to determine the size of your string in make_copy, use strlen.
Second, why are you converting a pointer (char*) to an integer? A char* is already a pointer (a memory address), as you can see if you do printf("address: %x\n", ptr);.
sizeof(arr) will not give the exact size. pass the length of array to the function if you want to compute array size.
When pass the array to function it will decay to pointer, we cannot find the array size using pointer.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *strdup(const char *str)
{
char *s = (char*)malloc(strlen(str)+1);
if (s == NULL) return NULL;
return strcpy(s, str);
}
int main()
{
char *s = strdup("hello world");
puts(s);
free(s);
}
Points
~ return char* inside of int.
~ you can free the memory using below line
if(make_copy!=NULL)
free(make_copy)
Below is the modified code.
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
char* make_copy(char arr[]);
int main()
{
char arrr[]={'a','b','c','d','e','f','\0'};
char *ptr;
ptr=make_copy(arrr,sizeof(arrr)/sizeof(char));
printf("%s",ptr);
printf("%p\n %p",ptr,arrr);
getchar();
return 0;
}
char* make_copy(char arr[],int size)
{
char *str_ptr=NULL;
str_ptr=(char*)malloc(size+1);
int i=0;
for(;i<size;i++)
str_ptr[i]=arr[i];
str_ptr[i]=0;
return str_ptr;
}

c fwrite() writing to a file with only one of the struct variable?

This function is supposed to get a parameter as the pointer of a file and put all file into the struct anagram, then write it to another file. Right now the data only contains a.word, but it suppose to containst a.sorted too? I have check the a.sorted using printf
and it printf out the correct data, but why its not writing to the data file?
It still cant get the a.sorted even if i increase the count of the frwite
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include "anagrams.h"
#define SIZE 80
//struct
struct anagram {
char word[SIZE];
char sorted[SIZE];
};
void buildDB ( const char *const dbFilename ){
FILE *dict, *anagramsFile;
struct anagram a;
//check if dict and anagram.data are open
errno=0;
dict= fopen(dbFilename, "r");
if(errno!=0) {
perror(dbFilename);
exit(1);
}
errno=0;
anagramsFile = fopen(anagramDB,"wb");
char word[SIZE];
char *pos;
int i=0;
while(fgets(word, SIZE, dict) !=NULL){
//get ripe of the '\n'
pos=strchr(word, '\n');
*pos = '\0';
strncpy(a.word,word,sizeof(word));
//lowercase word
int j=0;
while (word[j])
{
tolower(word[j]);
j++;
}
/* sort array using qsort functions */
qsort(word,strlen(word), 1, charCompare);
strncpy(a.sorted,word,sizeof(word));
//printf(a);
fwrite(&a,1,strlen(word)+1,anagramsFile);
i++;
}
fclose(dict);
fclose(anagramsFile);
}
it suppose to contains data with a.sorted for example "10th 01ht"
data:
fwrite(&a,1,strlen(word)+1,anagramsFile); should have been fwrite(a.sorted,1,strlen(a.sorted)+1,anagramsFile); I assume the declaration of sorted as char sorted[SOME_LEN];

Resources