I have an input file named as datafile.data, which looks something like below:
1,2,1,1,0
1,3,1,1,0
1,1,2,2,1
2,1,2,2,1
2,3,2,3,1
1,1,2,3,2
3,1,1,4,2
2,1,3,2,2
3,3,3,1,2
2,2,3,4,2
Here the 1st 4 columns stands for 4 attribute values say A1, A2, A3, A4. And the final column stands for the class value. For this particular sample file there are 4 attributes but for some other files, there can be 'n' number of attributes but for every file, the last column will give the class values.
Now I want to convert this file to another file named as : outputfile.exp
Where the output file's 1st row looks something like below:
<Number of rows in the .data file> <Number of attributes> <Max value of A1> <Max value of A2> <Max value of A3> <Max value of A4> <(Max value of last column)+1>
And the remaining rows of the output file will be same as the data file, with just one change, that is the last column's each value will be incremented by 1.
For an example the output file for the above example will look like:
10 4 3 3 3 4 3
1,2,1,1,1
1,3,1,1,1
1,1,2,2,2
2,1,2,2,2
2,3,2,3,2
1,1,2,3,3
3,1,1,4,3
2,1,3,2,3
3,3,3,1,3
2,2,3,4,3
Where the 1st row's 10 is the number of rows, 4 is the number of attributes present, (3,3,3,4) these 4 are the maximum values of attributes A1,A2,A3 and A4 and last 3 stands for the highest class value +1. And the last column's every value has been incremented by 1 as well.
Below I am attaching my try:
#include <stdio.h>
#include <string.h>
#define MAX_FILE_NAME 100
int main()
{
FILE *fp;
int count = 0; // Line counter (result)
char filename[MAX_FILE_NAME], dataToBeRead[50];
char c; // To store a character read from file
// Open the file
fp = fopen("datafile.data", "r");
// Check if file exists
if (fp == NULL)
{
printf("Could not open file %s", filename);
return 0;
}
// Extract characters from file and store in character c
for (c = getc(fp); c != EOF; c = getc(fp))
if (c == '\n') // Increment count if this character is newline
count = count + 1;
fclose(fp);
printf("%d\n",count);
fp = fopen("datafile.data", "r");
if ( fp == NULL )
{
printf( "Failed to open." ) ;
}
else
{
while( fgets ( dataToBeRead, 50, fp ) != NULL )
{
printf( "%s" , dataToBeRead ) ;
}
fclose(fp) ;
}
return 0;
}
And I am getting the below output:
10
1,2,1,1,1
1,3,1,1,1
1,1,2,2,2
2,1,2,2,2
2,3,2,3,2
1,1,2,3,3
3,1,1,4,3
2,1,3,2,3
3,3,3,1,3
2,2,3,4,3
Now I am unable to proceed further, as I am very new to C, please help me out.
Edit 1 : The output format of the example will be:
10 4 3 3 3 4 3
1 2 1 1 1
1 3 1 1 1
1 1 2 2 2
2 1 2 2 2
2 3 2 3 2
1 1 2 3 3
3 1 1 4 3
2 1 3 2 3
3 3 3 1 3
2 2 3 4 3
You really don't want to do this, since rewinding an input stream is an anti-pattern. But you can do something like:
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE * xfopen(const char *path, const char *mode);
void * xmalloc(size_t s);
void
parse_line(const char *buf, int *max, int column_count)
{
for(int i = 0; i < column_count; i++ ){
char *end;
int t = strtol(buf, &end, 10);
if( t > max[i] ){
max[i] = t;
}
if( !((i < column_count - 1 && *end == ',')
|| (i == column_count - 1 && *end == '\n'))
){
fprintf(stderr, "invalid input '%c' in %s", *end, buf);
exit(1);
}
buf = end + 1;
}
}
int
main(int argc, char **argv)
{
const char *path = argc > 1 ? argv[1] : "stdin";
FILE *in = argc > 1 ? xfopen(path, "r") : stdin;
char buf[1024];
int column_count = 1;
int row_count = 1;
int *max;
/* Read first line to determine number of columns */
if( fgets(buf, sizeof buf, in) == NULL ){
fputs("Input error\n", stderr);
return 1;
}
for( const char *p = buf; *p; p++ ){
if( *p == ',' ){
column_count += 1;
}
}
max = xmalloc(column_count * sizeof *max);
for( int i = 0; i < column_count; i++ ){
max[i] = INT_MIN;
}
parse_line(buf, max, column_count);
while( fgets(buf, sizeof buf, in) != NULL ){
row_count += 1;
parse_line(buf, max, column_count);
}
if( fseek(in, 0L, SEEK_SET) ){
perror(path);
return 1;
}
printf("%d %d ", row_count, column_count - 1);
for( int i = 0; i < column_count - 1; i += 1 ){
printf("%d ", max[i]);
}
printf("%d\n", max[column_count - 1] + 1);
while( fgets(buf, sizeof buf, in) != NULL ){
char *comma = strrchr(buf, ',');
if( comma == NULL ){
fprintf(stderr, "Invalid input\n");
return 1;
}
*comma = '\0';
int k = strtol(comma + 1, NULL, 10);
printf("%s,%d\n", buf, k + 1);
}
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}
void *
xmalloc(size_t s)
{
void *rv = malloc(s);
if( rv == NULL ){
perror("malloc");
exit(EXIT_FAILURE);
}
return rv;
}
You can execute this as ./a.out < datafile.data > outputfile.exp or ./a.out datafile.data > outputfile.exp, but this will not work if you try to read from a pipe (the seek will fail). The seek failure and the inability to run this as a filter make this a suboptimal approach, but storing the entire file in memory also has drawbacks.
As William Pursell has provided superb answer in C, here is an awk alternative, although awk is not tagged.
awk -F, -v OFS="," ' # assign input/output field separator to a comma
NR==FNR { # this block is invoked for the 1st read of the input file
for (i = 1; i <= NF; i++) { # loop over the filelds
if (max[i] == "" || max[i] < $i) max[i] = $i
# update the max values
}
nr = NR; nf = NF # store #records and #fields
next # skip following statements
}
FNR==1 { # this block is invoked just before reading he 1st line for the 2nd read of the input file
printf("%d %d ", nr, nf - 1) # print #records and #fields - 1
max[nf]++ # increment the max value of the last field
for (i = 1; i <= nf; i++) { # print max values
printf("%d%s", max[i], i==nf ? "\n" : " ");
}
}
{ # this block is invoked for the 2nd read
$nf++ # increment the value of the last field
print # print fields as csv
}
' datafile.data datafile.data # read the input file twice
Below is the modified code, where I want to read .names file first and then check whether the last line of that .names has a zero then I want to produce the output.
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE * xfopen(const char *path, const char *mode);
void * xmalloc(size_t s);
void parse_line(const char *buf, int *max, int column_count)
{
for(int i = 0; i < column_count; i++ ){
char *end;
int t = strtol(buf, &end, 10);
if( t > max[i] ){
max[i] = t;
}
if( !((i < column_count - 1 && *end == ',') || (i == column_count - 1 && *end == '\n')) ){
fprintf(stderr, "invalid input '%c' in %s", *end, buf);
exit(1);
}
buf = end + 1;
}
}
int main(int argc, char **argv)
{
char *path1;
char *path = argc > 1 ? argv[1] : "stdin";
sprintf(path, "%s.data", argv[1]);
FILE *in = argc > 1 ? xfopen(path, "r") : stdin;
char buf[1024];
int column_count = 1;
int row_count = 1;
int *max;
/* Read first line to determine number of columns */
if( fgets(buf, sizeof buf, in) == NULL ){
fputs("Input error\n", stderr);
return 1;
}
for( const char *p = buf; *p; p++ ){
if( *p == ',' ){
column_count += 1;
}
}
max = xmalloc(column_count * sizeof *max);
for( int i = 0; i < column_count; i++ ){
max[i] = INT_MIN;
}
parse_line(buf, max, column_count);
while( fgets(buf, sizeof buf, in) != NULL ){
row_count += 1;
parse_line(buf, max, column_count);
}
if( fseek(in, 0L, SEEK_SET) ){
perror(path);
return 1;
}
printf("%d %d ", row_count, column_count - 1);
for( int i = 0; i < column_count - 1; i += 1 ){
printf("%d ", max[i]);
}
printf("%d\n", max[column_count - 1] + 1);
while( fgets(buf, sizeof buf, in) != NULL ){
char *comma = strrchr(buf, ',');
if( comma == NULL ){
fprintf(stderr, "Invalid input\n");
return 1;
}
*comma = '\0';
int k = strtol(comma + 1, NULL, 10);
for(char *p = buf; *p; p++){
if( *p == ',' ) *p = ' ';
}
printf("%s %d\n", buf, k + 1);
}
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}
void *
xmalloc(size_t s)
{
void *rv = malloc(s);
if( rv == NULL ){
perror("malloc");
exit(EXIT_FAILURE);
}
return rv;
}
I have to request a first word to compare it to a second word, and replace all occurrences with '*' working character by character without using the <string.h> library.
Exercise:
Write a C program that receives two words entered from the keyboard as input. Consider that each word can contain a maximum of 30 characters. The program must be case sensitive, ie it must distinguish lowercase letters from uppercase letters and must also be able to analyze numbers, symbols and punctuation marks. The program must replace each occurrence of the second word in the first word with the '*' character. For example, enter the words
abchdfffchdchdtlchd
and
chd
the program should display the word
ab*fff**tl*
#include <stdio.h>
#include <stdlib.h>
#define MAX 30
int main()
{
char string1 [MAX+1], string2 [MAX+1],replace = '*';
int nChar1 = 0, nChar2 = 0, flag = 0, h=0;
printf ("Enter a word (max 30 characters): ");
scanf ("%[^\n ]", &string1);
fflush (stdin);
printf ("\nYou wrote this word: %s\n", string1);
for (int i=0; i<(MAX+1); i++)
{
if (string1[i] == '\0')
break;
else
nChar1++;
}
printf ("The characters are: %d\n", nChar1);
printf ("\nEnter a word you want to change with '*' in the first string: ");
scanf ("%[^\n ]", &string2);
fflush (stdin);
printf ("\nYou wrote this word: %s\n", string2);
for (int j=0; j<(MAX+1); j++)
{
if (string2[j] == '\0')
break;
else
nChar2++;
}
printf ("The characters are: %d\n", nChar2);
for (int i=0, j=0, z=0; i<nChar1, j<nChar2; i++, j++)
{
if (string1[i] == string2[j])
{
for (int k=0; k<nChar2; k++)
{
if (string1[i+k] == string2[j+k])
flag++;
else
flag=0;
}
}
j=0;
if (flag == nChar2)
{
string1[h] = replace;
h++;
}
else
{
h++;
}
string1[z+1] = string1[h];
}
printf("\n%s", string1);
return 0;
}
Decompose the task into several separate functions.
One function will calculate the length of the passed string.
Another function will find a substring in a string.
And the third function will do the replacement of the target substring with a character.
Here is a demonstrative program.
#include <stdio.h>
size_t length( const char *s )
{
size_t n = 0;
while ( *s++ ) ++n;
return n;
}
char * find_substring( const char *s1, const char *s2 )
{
size_t n1 = length( s1 );
size_t n2 = length( s2 );
const char *target = NULL;
if ( ( *s2 != '\0' ) && !( n1 < n2 ) )
{
for ( size_t i = 0, n = n1 - n2 + 1; !target && i < n; i++ )
{
if ( s1[i] == s2[0] )
{
size_t j = 1;
while ( j != n2 && s1[i+j] == s2[j] ) ++j;
if ( j == n2 ) target = s1 + i;
}
}
}
return ( char * )target;
}
char * replace( char *s1, const char *s2, char c )
{
int done = 0;
size_t n2 = length( s2 );
for ( char *p = s1, *q = s1; !done; )
{
char *tmp = find_substring( q, s2 );
if ( tmp == NULL )
{
if ( p != q )
{
while ( ( *p++ = *q++ ) );
}
done = 1;
}
else
{
if ( p == q )
{
p = tmp;
}
else
{
while ( q != tmp ) *p++ = *q++;
}
*p++ = c;
q = tmp + n2;
}
}
return s1;
}
int main(void)
{
{
char s1[] = "abc";
const char *s2 = "chd";
puts( replace( s1, s2, '*' ) );
}
{
char s1[] = "achd";
const char *s2 = "chd";
puts( replace( s1, s2, '*' ) );
}
{
char s1[] = "chda";
const char *s2 = "chd";
puts( replace( s1, s2, '*' ) );
}
{
char s1[] = "chd";
const char *s2 = "chd";
puts( replace( s1, s2, '*' ) );
}
{
char s1[] = "abchdfffchdchdtlchd";
const char *s2 = "chd";
puts( replace( s1, s2, '*' ) );
}
return 0;
}
The program output is
abc
a*
*a
*
ab*fff**tl*
What this code does simply is to break up a sentence into individual word, for example: you input My name is John, it returns:
My
name
is
John
I'll like to know if there's any better way to rewrite this?
int main() {
int w_size = 0;
bool check_bool = false;
char l_str[81];
char *ptr_to_word[81];
for (char *res_p = &(l_str[0]); *res_p != '\0'; res_p++) {
if ((*res_p != '.') && (*res_p != ',') && (*res_p != ' ') && (check_bool == false)) {
ptr_to_word[w_size] = res_p;
w_size++;
check_bool = true;
}
if (((*res_p == '.') || (*res_p == ',') || (*res_p == ' ')) && (check_bool == true)) {
check_bool = false;
}
}
if (w_size == 0) {
printf("no solution");
} else {
for (int i = 0; i < w_size; i++) {
char *a = ptr_to_word[i];
while ((*a != ',') && (*a != '.') && (*a != '\0') && (*a != ' ')) {
printf("%c", *a);
a++;
}
printf("\n");
}
}
return 0;
}
the following proposed code:
prompts the user for the sentence to be divided into words
cleanly compiles
performs the desired functionality
And now, the proposed code: (EDIT per chqrlie)
#include <stdio.h>
#include <string.h>
#define MAX_BUF_LEN 1024
#define MAX_WORDS 100
int main( void )
{
char buffer[ MAX_BUF_LEN ] = {0};
char *words[ MAX_WORDS ] = {NULL};
printf( "%s\n", "Please enter a sentence to be divided into words" );
if( fgets( buffer, sizeof( buffer ), stdin ) )
{
size_t wordCount = 0;
char *token;
token = strtok( buffer, ",. " );
while( wordCount < MAX_WORDS && token )
{
words[ wordCount ] = token;
wordCount++;
token = strtok( NULL, ",. " );
}
for( size_t i = 0; i < wordCount; i++ )
{
printf( "%zu: %s\n\n", i+1, words[i] );
}
}
}
Here is the results of a typical run of the proposed code:
Please enter a sentence to be divided into words
This is a sentence to be divided into words
1: This
2: is
3: a
4: sentence
5: to
6: be
7: divided
8: into
9: words
If you dont need to store the words into an array, you can output them directly:
#include <stdio.h>
#include <string.h>
int main() {
char str[81];
printf("Enter string: ");
if (fgets(str, sizeof str, stdin)) {
int pos = 0, len, index = 1;
for (;;) {
/* skip initial separators */
pos += strspn(str + pos, ",.\n ");
if (str[pos] == '\0')
break;
/* compute the length of the word */
len = strcspn(str + pos, ",.\n ");
printf("%d: %.*s\n", index++, len, str + pos);
pos += len;
}
}
return 0;
}