I'm struggling with this split function based on C. After returning the struct by reference from the strSplit() function the token handle seems wrong. The variable sd->tokens is the right address, but I cannot get the tokens. But they are correct because inside the function I can get it.
How can I resolve this and what is the reason for this behavior. All remaining variables in the struct are ok.
#include <stdio.h>
#include <string.h>
struct splitResult {
char *source;
long lSource;
int result;
char **tokens;
};
typedef struct splitResult splitResultStruct;
splitResultStruct * strSplit(char *source, char *delimiter);
int main(int argc, const char * argv[]) {
char *source = "part1.part2.part3";
splitResultStruct *sd;
sd = strSplit(source, ".");
printf("%d tokens found\n", sd->result);
for(int i=0; i<sd->result; i++) {
printf("%s\n", sd->tokens[i]);
}
return 0;
}
splitResultStruct * strSplit(char *source, char *delimiter) {
// Defines the result struct
splitResultStruct sData, *sDataPtr;
sDataPtr = &sData;
sData.source = source;
// Gets the length of source string
sData.lSource = strlen(source);
// Don't split if empty string is given
if(sData.lSource == 0) {
sData.result = -1;
return sDataPtr;
}
// Allocate memory according teh size of source string
char data[sData.lSource];
// Copy the source into the allocated memory
strcpy(data,source);
// Just count the tokens
char *token = strtok(data, delimiter);
int tc = 0;
while (token != NULL)
{
token = strtok(NULL, delimiter);
tc++;
}
if(tc == 0) {
sData.result = -1;
return sDataPtr;
}
// Defines an array of char pointer with the dimension of the number of tokens
sData.result = tc;
char *tokens[tc];
// Resets the token engine
strcpy(data,source);
// Strip out the first token found
token = strtok(data, delimiter);
tokens[0] = token;
tc = 0;
while (token != NULL)
{
// Strip out one token and store them into the token array
token = strtok(NULL, delimiter);
tc++;
tokens[tc] = token;
}
sData.tokens = tokens;
for(int i=0; i<sData.result; i++) {
printf("%s\n", sData.tokens[i]);
}
return sDataPtr;
}
Remember, never return a pointer to local variable.
Local variables are destoryed when the function returns.
Try:
sDataPtr = malloc(sizeof(splitResultStruct));
Or:
static splitResultStruct sData;
splitResultStruct* sDataPtr;
Ok, guys, got it regarding the struct memory. Changed the code to this version, so the struct will be declared in main and a pointer is passed. The functions returns nothing but changes the struct itself. But the tokens remains empty. Not a clue...
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
struct splitResult {
char *source;
long lSource;
int result;
long tSize;
char **tokens;
};
typedef struct splitResult splitResultStruct;
void strSplit(splitResultStruct *, char *source, char *delimiter);
int main(int argc, const char * argv[]) {
char *source = "part1.part2.part3";
splitResultStruct sd;
strSplit(&sd, source, ".");
printf("%d tokens found\n", sd.result);
for(int i=0; i<sd.result; i++) {
printf("%s\n", sd.tokens[i]);
}
return 0;
}
void strSplit(splitResultStruct *sData, char *source, char *delimiter) {
sData->source = source;
// Gets the length of source string
sData->lSource = strlen(source);
// Don't split if empty string is given
if(sData->lSource == 0) {
sData->result = -1;
}
// Allocate memory according teh size of source string
char data[sData->lSource];
// Copy the source into the allocated memory
strcpy(data,source);
// Just count the tokens
char *token = strtok(data, delimiter);
int tc = 0;
while (token != NULL)
{
token = strtok(NULL, delimiter);
tc++;
}
if(tc == 0) {
sData->result = -1;
}
// Defines an array of char pointer with the dimension of the number of tokens
sData->result = tc;
char *tokens[tc];
// Resets the token engine
strcpy(data,source);
// Strip out the first token found
token = strtok(data, delimiter);
tokens[0] = token;
tc = 0;
while (token != NULL)
{
// Strip out one token and store them into the token array
token = strtok(NULL, delimiter);
tc++;
tokens[tc] = token;
}
sData->tokens = tokens;
for(int i=0; i<sData->result; i++) {
printf("%s\n", sData->tokens[i]);
}
}
Result:
Part1
Part2
Part3
3 tokens found
(null)
(null)
(null)
Program ended with exit code: 0
Related
The program prints all the outputs from the file I expect it to if I comment out the second line however if I re-add it the tokens reach null earlier and only 2 words from the file are printed any problems I'm missing?
printf("%s\n",texttoken);
fprintf(resultptr,"%s ",filterer(redwords,lowercase(texttoken)));
The rest of the code is below.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void main(){
char *filterer(char* redwords, char* word);
char *lowercase(char *word);
FILE *redptr;
FILE *textptr;
FILE *resultptr;
char *redwords = malloc(20);
char *text = malloc(255);
char *texttoken;
char *temp;
redptr = fopen("redfile.txt", "r");
textptr = fopen("textfile.txt", "r");
resultptr = fopen("result.txt", "w");
fgets(redwords,20,redptr);
redwords = lowercase(redwords);
fgets(text,255,textptr);
texttoken = strtok(text, " ");
while(texttoken != NULL){
printf("%s\n",texttoken);
fprintf(resultptr,"%s ",filterer(redwords,lowercase(texttoken)));
texttoken = strtok(NULL, " ");
}
}
char *filterer(char *redwords, char *word){
int match = 0;
char *token;
token = strtok(redwords, ",");
while(token != NULL) {
if(strcmp(word,token)==0){
match = 1;
}
token = strtok(NULL, ",");
}
if(match == 1){
int i;
int len = strlen(word);
char modified[len+1];
modified[len] = NULL;
for(i=0; i<len; i++){
modified[i] = '*';
}
return modified;
}
return word;
}
char *lowercase(char *word){
int i;
for(i=0; i<=strlen(word); i++){
if(word[i]>=65&&word[i]<=90)
word[i]=word[i]+32;
}
return word;
}
At least these problems:
Return of invalid pointer
return modified; returns a pointer to a local array. Local arrays become invalid when the function closes.
char modified[len+1];
modified[len] = NULL;
for(i=0; i<len; i++){
modified[i] = '*';
}
return modified; // Bad
Save time: Enable all warnings
Example: warning: function returns address of local variable [-Wreturn-local-addr]
Nested use of strtok()
Both this loop and filterer() call strtok(). That nested use is no good. Only one strtok() should be active at a time.
while(texttoken != NULL){
printf("%s\n",texttoken);
fprintf(resultptr,"%s ",filterer(redwords,lowercase(texttoken)));
texttoken = strtok(NULL, " ");
}
Since filterer() is only looking for a ',', look to strchr() as a replacement.
I'm not sure where I'm messing up so I've given a summary of each function so my logic can be checked!
The main program takes arguments from the command line and stores them in char pointer array.
The correct command to run program is ./re-do_hw4_prob6 filename. (filename is sears_kmart_stores_closing_2019.txt in this case)
After checking if argument number is correct, the file is opened.
A while loop copies strings of text from file to buffer until NULL is met.
Then the function getState() is called. The state is printed.
The file is closed.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "redo_hw4_functs.h"
int main(int argc, char* argv[])
{
char** states;
FILE* pFile;
char buffer[80];
int i = 0;
if(argc < 2){
printf("Too few arguments! \n");
}
else if(argc > 2){
printf("Too many arguments! \n");
}
pFile = fopen(argv[1], "r");
states = malloc(50*sizeof(char));
for (i = 0; i < 50; i++)
{
states[i] = malloc(3*sizeof(char));
while(fgets(buffer, sizeof(buffer), pFile) != NULL)
{
getState(states[i], buffer);
printf("State: %s \n", states[i]);
}
}
fclose(pFile);
}
The getState() function takes in two char arrays. One to read from the other to copy too.
It tokenizes the string being read from using a comma, a tab, and a new line as the delimiters. -> ",\t\n"
On the last token it copies the last two chars to the empty string array.
//accepts a line of string formatted as expected and stores the store state in char file ¡OJO! This is the hardest one because you cant rely on delimeters alone to find state
void getState(char strState[], char strLine[])
{
int i;
char* token;
char delim[] = ",\t\n";
token = strtok(strLine, delim);
token = strtok(strLine, delim);
token = strtok(strLine, delim);
for(i = (strlen(token) - 2); i < strlen(token); i++)
{
strState[i] =token[i];
}
}
I have also included my other functions to see if there are any other mistakes to be corrected.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "redo_hw4_functs.h"
//accepts a line of string formatted as expected and stores the store name in char file
void getName(char strName[], char strLine[])
{
char* token;
char delim[] = " ,\t\n";
token = strtok(strLine, delim);
while(token != NULL)
{
if(strcmp(token, "sears") == 0 || strcmp(token, "kmart"))
{
strcpy(strName, token);
break;
}
token = strtok(NULL, delim);
}
}
//accepts a line of string formatted as expected and stores the store address in char file
void getAddress(char strAddress[], char strLine[])
{
char* token;
char delim[] = ",\t\n";
token = strtok(strLine, delim);
while(token != NULL)
{
if(isdigit(token[0]) && isalpha(token[sizeof(token)-1]))
{
strcpy(strAddress, token);
break;
}
token = strtok(NULL, delim);
}
}
//accepts a line of string formatted as expected and stores the store city in char file
void getCity(char strCity[], char strLine[])
{
int i;
char* token;
char delim[] = ",\t\n";
token = strtok(strLine, delim);
token = strtok(strLine, delim);
token = strtok(strLine, delim);
for(i = 0; i < (strlen(token) - 3); i++)
{
strcpy(strCity[i], token[i]);
}
}
//accepts a line of string formatted as expected and stores the store state in char file ¡OJO! This is the hardest one because you cant rely on delimeters alone to find state
void getState(char strState[], char strLine)
{
int i;
char* token;
char delim[] = ",\t\n";
token = strtok(strLine, delim);
token = strtok(strLine, delim);
token = strtok(strLine, delim);
for(i = (strlen(token) - 2); i < strlen(token); i++)
{
strcpy(strState[i], token[i]);
}
}
Here is an example of input text that is to be read:
Kmart, 217 Forks Of River Pkwy, Sevierville TN
Kmart, 4110 E Sprague Ave, Spokane WA
Kmart, 1450 Summit Avenue, Oconomowoc WI
Sears, 2050 Southgate Rd, Colorado Spgs CO
Sears, 1650 Briargate Blvd, Colorado Spgs CO
Sears, 3201 Dillon Dr, Pueblo CO
Here is an example of what the program is expected to be outputting:
State:TN
State:WA
State:WI
State:CO
State:CO
State:CO
Here is an example of what the program is outputting:
I assume that you want not only the status but also all the other fields so that you can deal with them later.
The code below may be quite different from yours, but I think that it is easier to use a single function to read each record.
The function read_data() reads data from the file pointer fp and store them in data, which is a pointer to a predefined struct data_t.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 1024
typedef struct {
char name[64];
char addr[64];
char city[64];
char state[8];
} data_t;
int read_data(FILE *fp, data_t *data) {
char buffer[BUFFER_SIZE];
// Read a record. If end-of-file is read, return -1.
if (fgets(buffer, BUFFER_SIZE, fp) == NULL) {
return -1;
}
char delim[] = ",\t\n";
// Find the name of the record.
char *token = strtok(buffer, delim);
strcpy(data->name, token);
// Find the address of the record.
token = strtok(NULL, delim);
while (*token == ' ') {
++token;
}
strcpy(data->addr, token);
// Find the city and status of the record.
// We cannot split them by strtok() easily, so we handle it later.
token = strtok(NULL, delim);
while (*token == ' ') {
++token;
}
// Find the position of the state.
char *ptr = token;
while (*ptr != '\0') {
++ptr;
}
ptr -= 2;
strcpy(data->state, ptr);
// Use NULL to separate the city and the state so that we can use strcpy().
while (*(ptr - 1) == ' ') {
--ptr;
}
*ptr = '\0';
// Copy the city field.
strcpy(data->city, token);
return 0;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "The number of arguments is incorrect.\n");
fprintf(stderr, "Usage: %s filename\n", argv[0]);
return -1;
}
FILE *fp = fopen(argv[1], "r");
data_t *data = malloc(sizeof(data_t));
while (read_data(fp, data) == 0) {
printf("State: %s\n", data->state);
}
free(data);
fclose(fp);
return 0;
}
The way to read data is hard-coded, so if the input format is different, you may need to change the content of read_data(), but it works well for your sample input.
I want to clean this static array of char* after using it . I want to read from file and split the lines gotten through fgets() into array of words return it and clean the buffer (static char *words). But this is my implementation of split() and i was wondering if the static char* words will not lead to memory leakage since i can't destroy it because i want to call it every time i am getting a line from a file :
#define MAX_LENGTH 10000
char** split(char* string)
{
static char* words[MAX_LENGTH / 2];
static int index = 0;
const char* delimiter = " ";
char* ptr = strtok(string, delimiter);
while (ptr != NULL)
{
words[index] = ptr;
ptr = strtok(NULL, delimiter);
++index;
}
index = 0;
return words;
}
int main()
{
char line[] = "yes you are good ";
char **splitted = split(line);
printf("%s\n", splitted[2]);
}
Please any idea?
You could reset it at the beginning of the function before reusing it. Basically you are clearing the previous entries in current call.
char* *split(char *string){
static char *words[MAX_LENGTH / 2];
static int index = 0;
//reset
for (int i=0; i < sizeof(words)/sizeof(words[0]); i++) {
words[i] = NULL;
}
const char *delimiter=" ";
char *ptr = strtok(string,delimiter);
while (ptr!=NULL)
{
words[index]= ptr;
ptr=strtok(NULL,delimiter);
++index;
}
index=0;
return words;
}
Hi, i am using the MinGW C Compiler with Code::Blocks and my code hangs when trying to print the contents of an array (well it is a custom data type).
For a quick summary: the program is taking the contents of a txt file and splits
the string up into individual words using a custom data type called a stringArray (the name explains itself). It then should print each word of the file to the user.
The problem is, it hangs and gives me the usual "[PROGRAM NAME HERE] is not responding." After pressing cancel it gives me this result:
Process returned -1073741819 (0xC0000005) execution time : 3.861 s
Press any key to continue.
I am a sort of beginner.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef struct stringArray
{
char *string;
}stringArray;
const char delim[2] = " ";
int string_to_array(char *filecontents)
{
char *token;
token = strtok(filecontents, delim);
int i;
int dirtyContentsLength;
stringArray newContents[100];
for(i = 0; i < 100; i++)
{
newContents[i].string = "";
}
i = 0;
while (token != NULL)
{
newContents[i].string = token;
i++;
token = strtok(NULL, delim);
}
return newContents;
}
int open_file(char filename[30])
{
char *file_contents;
long input_file_size;
FILE *input_file = fopen(filename, "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
fclose(input_file);
return file_contents;
}
int lex(char filecontents[30])
{
char *tok = "";
int state = 0;
char *string = "";
}
int main(int argc, char *argv[] )
{
const char *cleanContents;
char *messyContents;
char input[30];
printf("What is the filename? ");
scanf("%s", input);
messyContents = open_file(input);
cleanContents = string_to_array(messyContents);
int contentsLength = sizeof(cleanContents) / sizeof(cleanContents[0]);
int i;
for(i = 0; i < contentsLength; i++)
{
printf("%s\n", cleanContents[i]);
}
printf("Done");
return 0;
}
You have multiple problems with your code:
string_to_array() is declared to return an int, but in reality it is returning a stringArray
Same with open_file() function, Declared to return an int, but actually returning a char*
string_to_array is returning an element that was declared locally. This means that once the function is returned, that memory is no longer valid, but it has passed it on to the caller.
Your structure name is misleading. A char* is a character array (a string). Thus the name charArray would be more appropriate. For the structure to be a string array it has to be a char**, ie an array of character arrays (array of strings)
Int the printf() in the main() function you are not passing the string (thus a compilation warning is generated)
You are not initializing memory to all 0. This is ideal as otherwise the memory will contain random data which will be interpreted as a string untill the first null terminator (\0 encountered)
The following code is a modified working version of what you are trying to achieve with comments about each change:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef struct stringArray
{
char *string;
}stringArray;
const char delim[2] = " ";
// Now string_to_array takes the memory location to write output to as a first parameter so that the
// memory will reside in the callers scope (refer to problem 3 above)
// Additionally return type was now set to void (refer to problem 1)
void string_to_array(stringArray newContents[100], char *filecontents)
{
char *token;
token = strtok(filecontents, delim);
int i;
int dirtyContentsLength;
for(i = 0; i < 100; i++)
{
newContents[i].string = "";
}
i = 0;
while (token != NULL)
{
newContents[i].string = token;
i++;
token = strtok(NULL, delim);
}
// return now was removed. result written directly in memory passed as parameter by the caller.
}
// open_file changed to return a char* (refer to problem 2)
char* open_file(char filename[30])
{
char *file_contents;
long input_file_size;
FILE *input_file = fopen(filename, "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
fclose(input_file);
return file_contents;
}
int lex(char filecontents[30])
{
char *tok = "";
int state = 0;
char *string = "";
}
int main(int argc, char *argv[] )
{
stringArray cleanContents[100];
// Initializing memory to all 0s (refer to problem 6)
memset(cleanContents, 0 ,sizeof(cleanContents));
char *messyContents;
char input[30];
printf("What is the filename? ");
scanf("%s", input);
messyContents = open_file(input);
string_to_array(cleanContents, messyContents);
int contentsLength = sizeof(cleanContents) / sizeof(cleanContents[0]);
int i;
for(i = 0; i < contentsLength; i++)
{
// Checking that at least one character is present in the string before printing it...
if (cleanContents[i].string[0])
{
// Printing the string within the 'stringArray'. (refer to problem 5)
printf("%s\n", cleanContents[i].string);
}
}
printf("Done\n");
return 0;
}
Is there a better of parsing the below string instead of doing a strtok() to get each field.
"subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000"
Basically I want to retrieve the value for each field into another char buf's.
Here is my code. Just wanted to know if there is any other better way of doing it (any better string parsing algos)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#define SUBJECT "subject="
#define CC_LIST "cc="
#define SERVER "server="
static void
get_value (const char *tok, char **rval_buf, size_t field_len)
{
size_t val_size = 0;
if (!tok || !rval_buf)
return;
val_size = strlen(tok + field_len) + 1;
*rval_buf = calloc(1, val_size);
if (*rval_buf) {
strlcpy(*rval_buf, tok + field_len, val_size);
}
}
int
main (int argc, char **argv)
{
/* hard coded buf for testing */
char buf[] = "subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000";
char *subject_text = NULL;
char *cc_list = NULL;
char *server_addr = NULL;
char *tok = NULL;
int field_len = 0;
int val_len = 0;
tok = strtok(buf, "&");
while(tok) {
/*
* Handle the token
*/
/* check if it is subject */
if (strstr(tok, SUBJECT)) {
get_value(tok, &subject_text, strlen(SUBJECT));
} else if (strstr(tok, CC_LIST)) { /* check if it is CC */
get_value(tok, &cc_list, strlen(CC_LIST));
} else if (strstr(tok, SERVER)) { /* check if it is server */
get_value(tok, &server_addr, strlen(SERVER));
}
tok = strtok(NULL, "&");
}
/* dump data */
fprintf(stdout, "\nSUBJECT: \"%s\"\nCC_LIST: \"%s\"\nSERVER: \"%s\" \n\n",
subject_text, cc_list, server_addr);
return EXIT_SUCCESS;
}
strstr searches for one string ("the needle") inside another ("the haystack"), but you really only want to know whether the needle is the beginning of the haystack.
Here's a small suggestion: (requires #include <stdbool> or change the booleans to ints. I like bools.)
static bool
getval(const char* haystack, const char** res, const char* needle, size_t len) {
if (haystack && 0 == strncmp(haystack, needle, len)) {
*res = strdup(haystack + len);
return true;
}
return false;
}
and later:
for (tok = strtok(buf, "&"); tok; tok = strtok(NULL, "&")) {
getval(tok, &subject_text, SUBJECT, strlen(SUBJECT)) ||
getval(tok, &cc_list, CC_LIST, strlen(CC_LIST)) ||
getval(tok, &server_addr, SERVER, strlen(SERVER));
}
You can actually get away with doing the strlen inside of getval, which cuts down a lot on the noise, because most modern compilers are clever enough to inline getval and constant-fold the length of a constant string.
Use strtok()
char *strtok(char *str, const char *delim)
You can put '&' as a delimeter
I wrote a quick-n-dirty splitter for you:
int split(char* input, char delim, char*** parts)
{
int count = 1;
char** result;
char* t = input;
while(*t != '\0')
{
if (*t++ == delim)
{
count++;
}
}
result = (char**)malloc(count * sizeof(char*));
t = input;
int i = 0;
result[i] = input;
while(*t != '\0')
{
if (*t == delim)
{
*t = '\0';
result[++i] = ++t;
}
else
{
t++;
}
}
*parts = result;
return count;
}
int main()
{
char raw[] = "subject=\"some text\"&cc=abcd&server=acd.com";
char* str = _strdup(raw);
char** parts;
char** keyval;
int cnt = split(str, '&', &parts);
for(int i=0; i<cnt; ++i)
{
split(parts[i], '=', &keyval);
printf("[%d]: %s <--> %s\n", i, keyval[0], keyval[1]);
free(keyval);
}
free(parts);
getchar();
return 0;
}
Output
[0]: subject <--> "some text"
[1]: cc <--> abcd
[2]: server <--> acd.com