Erase last members of line from text file - c

I have a text file as data.txt and I want to delete the last members of each line:
Here's the text file:
2031,2,0,0,0,0,0,0,54,0,
2027,2,0,0,0,0,0,0,209,0,
2029,2,0,0,0,0,0,0,65,0,
2036,2,0,0,0,0,0,0,165,0,
I would like to delete so it becomes:
2031,2,0,0,0,0,0,0,
2027,2,0,0,0,0,0,0,
2029,2,0,0,0,0,0,0,
2036,2,0,0,0,0,0,0,
I'm working in C but as the numbers can have two or three digits, I'm not sure how to do this.

A couple of uses of strrchr() can do the job:
#include <string.h>
void zap_last_field(char *line)
{
char *last_comma = strrchr(line, ',');
if (last_comma != 0)
{
*last_comma = '\0';
last_comma = strrchr(line, ',');
if (last_comma != 0)
*(last_comma + 1) = '\0';
}
}
Compiled code that seems to work. Note that given a string containing a single comma, it will zap that comma. If you don't want that to happen, then you have to work a little harder.
Test code for zap_last_field()
#include <string.h>
extern void zap_last_field(char *line);
void zap_last_field(char *line)
{
char *last_comma = strrchr(line, ',');
if (last_comma != 0)
{
*last_comma = '\0';
last_comma = strrchr(line, ',');
if (last_comma != 0)
*(last_comma + 1) = '\0';
}
}
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
char *line = malloc(4096);
if (line != 0)
{
while (fgets(line, 4096, stdin) != 0)
{
printf("Line: %s", line);
zap_last_field(line);
printf("Zap1: %s\n", line);
}
free(line);
}
return(0);
}
This has been vetted with valgrind and is OK on both the original data file and the mangled data file listed below. The dynamic memory allocation is there to give valgrind the maximum chance of spotting any problems.
I strongly suspect that the core dump reported in a comment happens because the alternative test code tried to pass a literal string to the function, which won't work because literal strings are not generally modifiable and this code modifies the string in situ.
Test code for zap_last_n_fields()
If you want to zap the last couple of fields (a controlled number of fields), then you'll probably want to pass in a count of the number of fields to be zapped and add a loop. Note that this code uses a VLA so it requires a C99 compiler.
#include <string.h>
extern void zap_last_n_fields(char *line, size_t nfields);
void zap_last_n_fields(char *line, size_t nfields)
{
char *zapped[nfields+1];
for (size_t i = 0; i <= nfields; i++)
{
char *last_comma = strrchr(line, ',');
if (last_comma != 0)
{
zapped[i] = last_comma;
*last_comma = '\0';
}
else
{
/* Undo the damage wrought above */
for (size_t j = 0; j < i; j++)
*zapped[j] = ',';
return;
}
}
zapped[nfields][0] = ',';
zapped[nfields][1] = '\0';
}
#include <stdio.h>
int main(void)
{
char line1[4096];
while (fgets(line1, sizeof(line1), stdin) != 0)
{
printf("Line: %s", line1);
char line2[4096];
for (size_t i = 1; i <= 3; i++)
{
strcpy(line2, line1);
zap_last_n_fields(line2, i);
printf("Zap%zd: %s\n", i, line2);
}
}
return(0);
}
Example run — using your data.txt as input:
Line: 2031,2,0,0,0,0,0,0,54,0,
Zap1: 2031,2,0,0,0,0,0,0,54,
Zap2: 2031,2,0,0,0,0,0,0,
Zap3: 2031,2,0,0,0,0,0,
Line: 2027,2,0,0,0,0,0,0,209,0,
Zap1: 2027,2,0,0,0,0,0,0,209,
Zap2: 2027,2,0,0,0,0,0,0,
Zap3: 2027,2,0,0,0,0,0,
Line: 2029,2,0,0,0,0,0,0,65,0,
Zap1: 2029,2,0,0,0,0,0,0,65,
Zap2: 2029,2,0,0,0,0,0,0,
Zap3: 2029,2,0,0,0,0,0,
Line: 2036,2,0,0,0,0,0,0,165,0,
Zap1: 2036,2,0,0,0,0,0,0,165,
Zap2: 2036,2,0,0,0,0,0,0,
Zap3: 2036,2,0,0,0,0,0,
It also correctly handles a file such as:
2031,0,0,
2031,0,
2031,
2031
,

Related

Longest word in file

My program needs to print longest word which contains only letters from a file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int checkString(const char s[]) {
unsigned char c;
while ((c = *s) && (isalpha(c) || isblank(c)))
++s;
return *s == '\0';
}
int main() {
char file_name[]="document.txt";
FILE *fp = fopen(file_name, "r");
char *largest = str;
int largest_len = 0;
while (fgets(file_name, 1000, fp) != NULL) {
char *temp = strtok(file_name, " ");
while (temp != NULL) {
if (strlen(temp) > largest_len) {
strcpy(largest, temp);
largest_len = strlen(largest);
}
temp = strtok(NULL, "\",.,1,2,4,5,6,7,8,9 ");
}
}
if(checkString(largest))
printf("%s", largest);
fclose(fp);
return 0;
}
In my code, if the largest word contains only letters it will be printed. How to modify this code to check next words if the largest doesn't contain only letters?
First of all, you cannot store the pointer to longest word like that. You re-use str for the next line and so the pointer is not likely to point to something useful.
Second, while strtok() appears simple, initially, I tend to apply a straightforward approach to a straightforward problem.
The problem is O(n) (where n is the length of the document). You just need to go through it character by character. Of course, since every line is ended by a \n, you can use the line based approach in this case.
So, instead of strtok, simply check each character, if it is a legal word character (an alphanumeric character, that is). You can easily do so with the standard library function isalpha() from header ctype.h.
Below is the program, copying the longest string into a dedicated buffer, using isalpha() and doing the line based reading of the file, just like the code in the original question did.
Of course, this code assumes, no line is ever longer than 999 characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <ctype.h>
static size_t gulp(const char* line, size_t istart, size_t len) {
size_t n = 0;
for (size_t i = istart; i < len; i++, n++) {
if (!isalpha(line[i])) {
break;
}
}
return n;
}
int main(int argc, const char * argv[]) {
FILE* f = fopen("document.txt","r");
char line[1000];
char longest_word[1000];
size_t longest_word_length = 0;
while (fgets(line, sizeof(line), f) != NULL) {
size_t i0 = 0;
size_t line_length = strlen(line);
while (i0 < line_length) {
if (isalpha(line[i0])) {
size_t n = gulp(line, i0, line_length);
if (n > longest_word_length) {
strncpy(longest_word, &line[i0], n);
longest_word[n] = '\0';
longest_word_length = n;
}
i0 = i0 + n;
} else {
i0++;
}
}
}
fclose(f);
f = NULL;
if (longest_word_length > 0) {
printf("longest word: %s (%lu characters)\n",
longest_word, longest_word_length);
}
return 0;
}
There are a number of problems here:
you use the same buffer (str) for two different uses: as a read buffer and to store the longest word. If you find the largest word in the first line, the word will be erased when reading the second line. Furthemore, if you find a rather long word at the beginning of a line, the strings pointed to by largest and temp could overlap which leads to undefined behaviour => use a different array or strdup (and free) for largest
you only use the space as possible separator. You should wonder whether you should add tab and/or punctuations
once you have got a word you should ensure that it only contains valid letters before testing its length and ignore it if for example it contains digits.
if a single line can be longer than 1000 characters, you should wrap the end of the current part before the beginning of the next one for the possible case where a long word would be splitted there.
For additional corner case processing, you should specify what to do if a word contains illegal characters but only at one side. For example if . is not used as a word delimiter, a word with an embedded . like "a.b" should be ignored, but a terminating . should only be stripped (like "example." should become "example"
I think the order you do things should be a bit different, here is an example
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
int isCandidate(char* word);
int main(int argc, char* argv[])
{
if (--argc == 0)
{
perror("not enough command line arguments, expecting a filename");
return -1;
}
++argv;
FILE* fp = fopen(*argv, "r");
if (fp == NULL)
{
perror(*argv);
return -1;
}
// get size of file
fseek(fp, 0L, SEEK_END);
long fileLength = ftell(fp);
if (fileLength < 1)
{
perror("file is empty");
return -1;
}
fseek(fp, 0L, SEEK_SET); // position file pointer at the beginning again
// allocate space for the whole file and then read it in
// for a text file it should be OK to do so since they
// normally are not that large.
char* buffer = malloc(fileLength+1);
if (fread(buffer, 1, fileLength, fp) != 0)
{
buffer[fileLength] = '\0'; // make sure the buffer ends with \0
}
else
{
perror("Failed reading into buffer");
return -1;
}
fclose(fp); // we are done with the file
const char filter[] = " \n\r";
char* longestWord = malloc(fileLength+1); // max length in theory
long unsigned int maxLength = 0;
for (char* token = strtok(buffer, filter); token != NULL; token = strtok(NULL, filter))
{
if (isCandidate(token))
{
if (strlen(token) > maxLength)
{
strcpy(longestWord, token);
maxLength = strlen(token);
}
}
}
printf("Longest word:'%s', len=%lu\n", longestWord, maxLength);
free(longestWord);
free(buffer);
}
int isCandidate(char* word)
{
if (word == NULL)
{
perror("invalid argument to isCandidate");
return 0;
}
for (char* ch = word; *ch; ++ch)
{
if (!isalpha(*ch)) return 0;
}
return 1;
}

How to Tokenize String without using strtok()

I'm trying to tokenize a string without using a strtok().
When I run characters of string, it will print in each line.
For instance, when I run:
printfTokens("Hello from other side!");
The output should be:
Hello
from
other
side!
As I'm just learning C, I'm stuck for hours on how to implement this program. So far, I only know the basics and playing around with not (still haven't learned any calloc, malloc, etc).
So far I have this code, but the output does not print anything.
#include <stdio.h>
#include <string.h>
#define MAX_WORD 100
void printfTokens(char *inputString) {
int i;
/*int inputStringLength;
for(i = 0; inputString[i] != '/0'; i++) {
inputStringLength++;
}*/
while(inputString[i] != '\0') {
char testing[MAX_WORD];
while(inputString[i] != ' ') {
testing[inputString[i]]++;
i++;
}
printf("%s", testing);
i++;
}
}
int main() {
printfTokens("TESTING ONE! TWO! THREE!");
return 0;
}
You do not initialize the variable i.
while(inputString[i] != '\0') can be written while(inputString[i]).
testing[inputString[i]]++ makes sense to count the number of occurrences of a given character from inputString, but it does not make sense to print it. You may want to do something like:
while(1)
{
char testing[MAX_WORD], *t=testing;
while(inputString[i]&&(inputString[i]!=' '))
*t++=inputString[i++];
if (t>testing) printf("%s", testing);
if (!inputString[i]) break;
i++;
}
It would be better to name MAX_WORD_LENGTH instead of MAX_WORD.
These are a few problems in your code.
Sample tokenization function.
size_t tokenize(const char *inputString, const char *delim, char **argv, size_t maxtokens)
{
size_t ntokens = 0;
char *tokenized = strdup(inputString);
if(tokenized)
{
argv[0] = tokenized;
while(*tokenized)
{
if(strchr(delim, *tokenized))
{
*tokenized = 0;
ntokens++;
if(ntokens == maxtokens - 1)
{
break;
}
argv[ntokens] = tokenized + 1;
}
tokenized++;
}
}
return ntokens + 1;
}
int main()
{
char *tokens[10];
size_t ntokens = tokenize("TESTING ONE! TWO! THREE!", " ", tokens , 10);
for(size_t i = 0; i < ntokens; i++)
{
printf("Token[%zu] = `%s`\n", i, tokens[i]);
}
free(tokens[0]);
return 0;
}
https://godbolt.org/z/znv8PszG6

How to split with multiple delimiters in C

I have this line of text:
32+-#3#2-#3#3
I need to separate numbers from each other. So basically the result would be like this:
3
2+-
3
2-
3
3
This is my code but it's not working properly because I have numbers with two digits:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[50] = "32-#3#2-#3#3";
// Extract the first token
char *token = strtok(string, "#");
// loop through the string to extract all other tokens
while (token != NULL) {
printf(" %s\n", token); //printing each token
token = strtok(NULL, "#");
}
return 0;
}
You can't do it with strtok (alone), because there is no delimiter between the numbers you want to split. It's easier without strtok, just print what you want printed and add a separator unless a character which belongs to the token follows:
#include <stdio.h>
int main()
{
char string[] = "32+-#3#2-#3#3";
for (char *token = string; *token; ++token)
if ('0'<=*token && *token<='9' || *token=='+' || *token=='-')
{
putchar(*token);
if (token[1]!='+' && token[1]!='-') putchar('\n');
}
}
If you consider this too easy, you can use a regular expression to match the tokens:
#include <stdio.h>
#include <regex.h>
int main()
{
char *string = "32+-#3#2-#3#3";
regex_t reg;
regcomp(&reg, "[0-9][+-]*", 0);
regmatch_t match = {0};
while (regexec(&reg, string+=match.rm_eo, 1, &match, 0) == 0)
printf("%.*s\n", (int)(match.rm_eo-match.rm_so), string+match.rm_so);
}
There is a simple way to achieve this, but in C is a bit more complicated since we don't have vector as in C++ but I can suggest a pure C implementation which can be improved:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void split_ss(const char* src,const char* pattern, char** outvec, size_t* outsize)
{
const size_t pat_len = strlen(pattern);
char* begin = (char*) src;
const char* next = begin;
if ((begin = strstr((const char*)begin, pattern)) != 0x00) {
unsigned int size = begin - next;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec , next, size);
outvec++;
(*outsize)+=1;
split_ss(begin+pat_len, pattern, outvec, outsize);
} else {
unsigned int size = &src[strlen(src)-1] - next + 1;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec, next, size);
(*outsize) += 1;
}
}
int main()
{
char* outdata[64] = {0};
size_t size, i=0;
split_ss("32+-#3#2-#3#3", "#", outdata, &size);
for(i=0; i < size; i++) {
printf("[%s]\r\n", outdata[i]);
}
// make sure to free it
return 0;
}
strstr is used to split by string rather than a character. Also output is a poorman 2D array with out size to iterate it and don't forget to free it.
strtok() is not the right tool for you purpose... As a matter of fact strtok() is rarely the right tool for any purpose because of its tricky semantics and side effects.
A simple loop will do:
#include <stdio.h>
int main(void) {
char string[50] = "32+-#3#2-#3#3";
for (char *p = string; *p; p++) {
if (*p == '#')
continue;
putchar(*p);
while (p[1] == '+' || p[1] == '-')
putchar(*++p);
putchar('\n');
}
return 0;
}

How to make directories for an specific user using C, CGI, HTML?

and i have a issue, im creating directories using C, CGI and HTML, i started with 2 source codes, one is this:
/*
* newdir.c - Create a directory
*/
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
printf("\n#Argumentos: %d\n", argc);
if(mkdir(argv[1], 0777)) {
perror("mkdir");
exit(EXIT_FAILURE);
}
exit(EXIT_SUCCESS);
}
And i have this other one:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Everything here is the same as sampleprogram.c except that
// there are differences when we have to read in the string
int main(void)
{
char *data;
char* s = malloc(100 * sizeof(char));
printf("%s%c%c\n",
"Content-Type:text/html;charset=iso-8859-1",13,10);
int length = atoi(getenv("CONTENT_LENGTH"));
int j =0;
char c;
c = getchar();
data = malloc(sizeof(char) * (length + 1) );
while ( c != EOF && j < length ) {
//read in one character
data[j] = c;
c = getchar();
j++;
}
data[j] = '\0';
printf("<TITLE>Make directorys</TITLE>");
printf("<H3> Make new directory \n <br> %s ");
if(data == NULL) {
printf("<P>Error! Error in passing data from form to script.");
return 0;
}
char delimiters[] = "&";
char* str = strtok( data, delimiters);
sscanf(str, "s=%s", s);
int i;
for (i=0; i < strlen(s); i++ ) {
if (s[i] == '+')
s[i] = ' ';
}
str = strtok(NULL, delimiters);
//s is the name collected from the text area in the html file
if(mkdir(s, 0777)) {
perror("mkdir");
exit(EXIT_FAILURE);
}
exit(EXIT_SUCCESS);
return 0;
}
so what I did was add the first code to the second other, the other code collects information from a text field of a html and this is processed by the CGI script.
It works perfect , but the directory is created for the user " daemon " and I can not add anything inside if I do not set the permissions to 777 again from console, why this happens ? something is wrong?
PD: if i run the first code from console works perfect, i'm the user :(
You have no way to really solve this, because the user that executes the scripts/programs is the daemon user. It's the way you setup your web server, the web server itself is run by the daemon user, and that means that every script/program invoked by the web server will be run by the same user too.
Also, both c programs you posted are really poorly coded, and it doesn't seem like you need to write them in the c language.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
const char *content_length;
char *data;
char s[100];
content_length = getenv("CONTENT_LENGTH");
if (content_length != NULL)
{
int length;
int j;
char c;
printf("Content-Type: text/html;charset=iso-8859-1\r\n\n");
length = atoi(content_length);
c = getchar();
data = malloc(length + 1);
if (data != NULL)
{
const char *delimiters;
char *token;
delimiters = "&";
while ((c != EOF) && (j < length))
{
data[j] = c;
c = getchar();
j++;
}
data[j] = '\0';
printf("<TITLE>Make directorys</TITLE>");
printf("<H3> Make new directory \n <br> %s ");
token = strtok(data, delimiters);
if ((token != NULL) && (sscanf(str, "s=%99s", s) == 1))
{
int i;
for (i = 0 ; s[i] != '\0' ; i++ )
{
if (s[i] == '+')
s[i] = ' ';
}
if (mkdir(s, 0777))
{
perror("mkdir");
exit(EXIT_FAILURE);
}
}
free(data);
}
}
return 0;
}

Split function in C runtime error

I get a runtime error when running a C program,
Here is the C source (parsing.h header code a little lower):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parsing.h"
int main()
{
printf("Enter text seperated by single spaces :\n");
char *a = malloc(sizeof(char)*10);
gets(a);
char **aa = Split(a, ' ');
int k = SplitLen(a, ' ');
int i = 0;
for(;i<k;i++)
{
printf("%s\n", aa[i]);
}
free(a);
free(aa);
return 0;
}
and the parsing.h file:
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <assert.h>
char** Split(char* a_str, const char a_delim)
{
char** result = 0;
int count = 0;
char* tmp = a_str;
char* last_comma = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, ",");
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, ",");
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int SplitLen(char *src, char sep)
{
int result = 0;
int i;
for(i = 0; i<strlen(src); i++)
{
if(src[i] == sep)
{
result += 1;
}
}
return result;
}
I'm sure most of the code is unneeded but I posted the whole lot in case there is some relevance, Here is the runtime error:
a.out: parsing.h:69: Split: Assertion `idx == count - 1' failed.
Aborted
Thanks in advance and for info I didn't program the whole lot but took some pieces from some places but most is my programming Thanks!.
The purpose of the assert function is that is will stop your program if the condition passed as an argument is false. What this tells you is that when you ran your program, idx != count - 1 at line 69. I didn't take the time to check what import that has on the execution of your program, but apparently (?) idx was intended to equal count - 1 there.
Does that help?
There are many problems. I'm ignoring the code split into two files; I'm treating it as a single file (see comments to question).
Do not use gets(). Never use gets(). Do not ever use gets(). I said it three times; it must be true. Note that gets() is no longer a Standard C function (it was removed from the C11 standard — ISO/IEC 9899:2011) because it cannot be used safely. Use fgets() or another safe function instead.
You don't need to use dynamic memory allocation for a string of 10 characters; use a local variable (it is simpler).
You need a bigger string — think about 4096.
You don't check whether you got any data; always check input function calls.
You don't free all the substrings at the end of main(), thus leaking memory.
One major problem the Split() code slices and dices the input string so that SplitLen() cannot give you the same answer that Split() does for the number of fields. The strtok() function is destructive. It also treats multiple adjacent delimiters as a single delimiter. Your code won't account for the difference.
Another major problem is that you analyze the strings based on the delimiter passed into the Split() function, but you use strtok(..., ',') to actually split on commas. This is more consistent with the commentary and names, but totally misleading to you. This is why your assertion fired.
You don't need to include <malloc.h> unless you are using the extra facilities it provides. You aren't, so you should not include it; <stdlib.h> declares malloc() and free() perfectly well.
This code works for me; I've annotated most of the places I made changes.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int altSplitLen(char **array);
static char **Split(char *a_str, const char a_delim);
static int SplitLen(char *src, char sep);
int main(void)
{
printf("Enter text separated by single spaces:\n");
char a[4096]; // Simpler
if (fgets(a, sizeof(a), stdin) != 0) // Error checked!
{
char **aa = Split(a, ' ');
int k = SplitLen(a, ' ');
printf("SplitLen() says %d; altSplitLen() says %d\n", k, altSplitLen(aa));
for (int i = 0; i < k; i++)
{
printf("%s\n", aa[i]);
}
/* Workaround for broken SplitLen() */
{
puts("Loop to null pointer:");
char **data = aa;
while (*data != 0)
printf("[%s]\n", *data++);
}
{
// Fix for major leak!
char **data = aa;
while (*data != 0)
free(*data++);
}
free(aa); // Major leak!
}
return 0;
}
char **Split(char *a_str, const char a_delim)
{
char **result = 0;
size_t count = 0;
char *tmp = a_str;
char *last_comma = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char *) * count);
if (result)
{
char delim[2] = { a_delim, '\0' }; // Fix for inconsistent splitting
size_t idx = 0;
char *token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int SplitLen(char *src, char sep)
{
int result = 0;
for (size_t i = 0; i < strlen(src); i++)
{
if (src[i] == sep)
{
result += 1;
}
}
return result;
}
static int altSplitLen(char **array)
{
int i = 0;
while (*array++ != 0)
i++;
return i;
}
Sample run:
$ parsing
Enter text separated by single spaces:
a b c d e f gg hhh iii jjjj exculpatory evidence
SplitLen() says 0; altSplitLen() says 12
Loop to null pointer:
[a]
[b]
[c]
[d]
[e]
[f]
[gg]
[hhh]
[iii]
[jjjj]
[exculpatory]
[evidence
]
$
Note that fgets() keeps the newline and gets() does not, so the newline was included in output. Note also how the printf() printing the data showed the limits of the strings; that is enormously helpful on many occasions.

Resources