Here is my problem statement:
I have a small question related to a portion of code which I can't find a proper solution to. Again I'm not necessarily asking for a full solution I just hit a deadend. I need to read from a file lines (Don't know their lengths) find the maximum length of a line and add spaces between words on each line, evenly, so that they are fully justified (all lines have the same size as the max one).
Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
int main() {
FILE *f;
char *word;
int j, i, m, n, c, k, z;
char aux[255] = "", aux1[255];
i = 0;
j = 0;
char file[100][100];
char s[100];
f = fopen("user_input.txt", "r");
m = 0;
while (fgets(file[i], sizeof(file[i]), f)) {
if (m < strlen(file[i]) - 1)
m = strlen(file[i]) - 1;
i++;
}
for (j = 0; j < i; j++) {
n = 0;
for (k = 0; k < strlen(file[j]); k++)
if (file[j][k] == ' ')
n++;
c = (m - strlen(file[j])) / n;
for (z = 0; z < c; z++)
aux[z] = ' ';
for (k = 0; k < strlen(file[j]); k++)
if (file[j][k] == ' ') {
strcpy(aux1, file[j] + k + 1);
strcat(file[j], aux);
strcat(file[j], aux1);
}
printf("%s", file[j]);
}
}
Your code is broken for multiple reasons:
you forgot to include <string.h>
you have hard coded limits to the maximum line length and the number of lines, both causing a penalty of 0.5p
you do not test for fopen() success, causing undefined behavior upon failure to open the file.
you do not test array boundaries when reading lines, causing undefined behavior if the file has more than 100 lines or 99 byte fragments thereof.
your computation of c = (m - strlen(file[j])) / n; is rounded down. You will not insert enough spaces for full text justification in many cases.
aux is not properly null terminated, it will keep growing up to the largest number of spaces to insert for any given line.
the insertion operation will corrupt the line and ultimately leave just the last word with some spaces inserted before.
the code was badly formated and since you do not use {} for non trivial statement, it is difficult to read and easy to break.
Here is a modified version that does not have such limitations:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
const char *filename = "user_input.txt";
FILE *f;
char *line;
int c, i, len, maxlen, skip, nw, spaces, ns;
/* open the file */
if ((f = fopen(filename, "r")) == NULL) {
fprintf(stderr, "cannot open %s\n", filename);
return 1;
}
/* first pass: determine the maximum line length */
for (maxlen = len = 0;;) {
c = getc(f);
if (c == '\n' || c == EOF) {
if (maxlen < len)
maxlen = len;
len = 0;
if (c == EOF)
break;
} else {
len++;
}
}
/* allocate the line buffer: maxlen characters plus newline plus '\0' */
if ((line = malloc(maxlen + 2)) == NULL) {
fprintf(stderr, "cannot allocate memory for %d bytes\n", maxlen + 2);
fclose(f);
return 1;
}
/* second pass: read one line at a time */
rewind(f);
while (fgets(line, maxlen + 2, f)) {
len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
/* strip the newline if any */
line[--len] = '\0';
}
/* skip and output initial spaces */
for (skip = 0; line[skip] == ' '; skip++) {
putchar(line[skip]);
}
/* count the words */
for (nw = 0, i = skip; i < len; i++) {
if (line[i] == ' ')
nw++;
}
/* output the text, expanding spaces */
spaces = maxlen - len;
for (i = skip; i < len; i++) {
if (line[i] == ' ') {
ns = spaces / nw;
printf("%*s", ns, "");
spaces -= ns;
nw--;
}
putchar(line[i]);
}
putchar('\n');
}
free(line);
fclose(f);
return 0;
}
Related
the code:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
char filename[] = "11.txt";
char filename1[] = "2.txt";
FILE *ptr, *resultptr;
char string[100];
char words[100][100];
int len = sizeof(filename) / sizeof(char);
int i = 0, j = 0, k, length, count;
int main()
{
fopen_s(&ptr, filename, "r");
fopen_s(&resultptr, filename1, "w");
if ((ptr == nullptr) || (resultptr == nullptr)) {
printf("Files were not opened!");
return -1;
}
while (fgets(string, sizeof string, ptr)) {
for (k = 0; string[k] != '\0'; k++) {
if (string[k] != ' ' && string[k] != '\n') {
words[i][j++] = tolower(string[k]);
} else {
words[i][j] = '\0';
i++;
j = 0;
}
}
length = i + !!j;
fputs("Occurrences of each word:\n", resultptr); //prints this sentence into file
for (i = 0; i < length; i++) {
if (strcmp(words[i], "0") == 0)
continue;
count = 1;
char *ch = words[i];
for (j = i + 1; j < length; j++) {
if (strcmp(words[i], words[j]) == 0 && (strcmp(words[j], "0") != 0)) {
count++;
strcpy_s(words[j], "0");
}
}
fputs("The word ", resultptr);
if (string[i] != ' ' && string[i] != '\n') {
fprintf(resultptr, "%s", ch);
}
fputs(" occurred ", resultptr);
fprintf(resultptr, "%d", count);
fputs(" times\n", resultptr);
}
fclose(ptr);
fclose(resultptr);
return 0;
}
}
The counting part is working perfectly fine, but the problem is when I try to print results, for the sentence "to be or not: to be that is the question ..." it prints this:
Occurrences of each word:
The word to occurred 2 times
The word be occurred 2 times
The word occurred 1 times
The word not: occurred 1 times
The word that occurred 1 times
The word is occurred 1 times
The word occurred 1 times
Occurrences of each word:
The word to occurred 1 times
The word be occurred 1 times
The word or occurred 1 times
The word occurred 1 times
The word that occurred 1 times
The word is occurred 1 times
The word occurred 2 times
The word question occurred 1 times
The word ... occurred 1 times
What's messing? like I'm not professional, but can someone guide me on what's wrong here? I changed a bit from the original one but still a lot of mistakes
There are multiple problems in the code:
the global variables should be moved inside the body of the main() function.
fopen_s() is not portable, use fopen() instead.
strcpy_s() is not portable, use strcpy() instead or just set the first byte if the string to '\0' to make it an empty string.
i and j should be reset to 0 after each fgets().
you should test for letters with isalpha() instead of only testing for space and newline.
you should clear the duplicated words by setting them to the empty string.
you should use a simple fprintf() call for the output line.
you should not close the files inside the while(fgets(...)) loop.
If you want to count all words in the file, this approach is limited to a rather small number of words. A more general solution would construct a dictionary of words found as you read the file contents and increment the count for each word found.
Here is a modified version:
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable:4996) // disable Microsoft obnoxious warning
#endif
#define WORDS 2000
#define CHARS 40
int main() {
char filename[] = "11.txt";
char filename1[] = "2.txt";
FILE *ptr, *resultptr;
char string[100];
char words[WORDS][CHARS];
int i, j, k, length, count;
ptr = fopen(filename, "r");
if (ptr == NULL) {
fprintf(stderr, "cannot open %s: %s\n", filename, strerror(errno));
return 1;
}
resultptr = fopen(filename1, "w");
if (resultptr == NULL) {
fprintf(stderr, "cannot open %s: %s\n", filename1, strerror(errno));
return 1;
}
i = j = 0;
while (i < WORDS && fgets(string, sizeof string, ptr)) {
for (k = 0; string[k] != '\0'; k++) {
unsigned char c = string[k];
if (isalpha(c)) {
if (j < CHARS - 1)
words[i][j++] = tolower(c);
} else {
words[i][j] = '\0';
if (j > 0) {
j = 0;
i++;
if (i == WORDS)
break;
}
}
}
if (j > 0) {
// include the last word if the file does not end with a newline
words[i][j] = '\0';
i++;
}
}
length = i;
fprintf(resultptr, "Occurrences of each word:\n");
for (i = 0; i < length; i++) {
if (words[i][0] == '\0')
continue;
count = 1;
for (j = i + 1; j < length; j++) {
if (strcmp(words[i], words[j]) == 0) {
count++;
words[j][0] = '\0';
}
}
fprintf(resultptr, "The word %s occurred %d times\n", words[i], count);
}
fclose(ptr);
fclose(resultptr);
return 0;
}
Note: Meanwhile, OP has applied the mentioned fixes to the question, thereby invalidating this answer. This answer applies to revision 4 of the question.
What's messing?
The word 0 occurred 1 times - You chose to replace word duplicates with the string "0". In order to not count those replacements as words, insert
if (strcmp(words[i], "0") == 0) continue;
at the very beginning of the printing for loop body. It seems you intended if (string[i] != ' ' && string[i] != '\0' && string[i]!='0' ) to do this, but that doesn't work - remove this code.
Besides, the empty string would be a better choice, allowing the word 0.
The word
occurred 1 times - The '\n' at the end was counted as a word. In order to not count this and in addition skip punctuation as well as avoid empty words due to consecutive non-word characters, replace
if (string[k] != ' ' && string[k] != '\0') {
words[i][j++] = tolower(string[k]);
}
else
with
if (isalnum(string[k]))
words[i][j++] = tolower(string[k]);
else if (j)
The word occurred 1 times - An empty word at the end of file was counted. In order to not count that, add 1 to i only if inside a word at EOF, i. e. change
length = i + 1;
to
length = i + !!j;
This code reads an input text file, and creates an output file based on its contents.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define OUT 0
#define IN 1
#define MAX 28
#define BLOCK 4000
/* Check whether the character is alphanumeric */
int isAlphanumeric(char c) {
return ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9');
}
int main(int argc, char *argv[]) {
int c, state = OUT, length = 0, i, j, counter[MAX];
char word[30], longest_word[30];
FILE *input, *output; /* FILE pointers to open the file */
/* Initialize the counter */
for (i = state; i < MAX; i++)
counter[i] = 0;
/* Open the file */
input = fopen("complete_shakespeare.txt", "r");
output = fopen("word_length_histogram.txt", "w");
/* Keep reading the character in the file */
while ((c = getc(input)) != EOF) {
/* If the character is alphanumeric, record it */
if (isAlphanumeric(c)) {
strncat(word, &c, 1);
}
/* If the character is not alphanumeric, increment the corresponding counter, and additionally, record longest word. */
else {
length = strlen(word);
if (length == 27) strcpy(longest_word, word);
counter[length] += 1;
memset(word, 0, sizeof(word));
}
}
/* If the file ends with a word, record its length */
if (isAlphanumeric(word[0])){
length = strlen(word);
counter[length] += 1;
}
/* print the longest word to the file */
fprintf(output, "%s\n\n", longest_word);
/* Make the histogram */
for (i = 1; i < MAX; i++) {
int dividend = counter[i] / 4000 + 1;
fprintf(output, "%2d %6d ", i, counter[i]);
for (j = dividend; j >= 1; j--){
if (counter[i] != 0)
fprintf(output, "*");
}
fprintf(output, "\n");
}
/* Don't forget to close the FILEs */
fclose(input);
fclose(output);
return 0;
}
It produces the correct output file, but this error comes up whenever I compile it.
B:\CodeBlocks\Projects\Programming in C\hw_4\Homework_4\main.c|44|warning: passing argument 2 of 'strncat' from incompatible pointer type [-Wincompatible-pointer-types]|
The warning seems to come from the only line with strncat. Does anyone know how this can be remedied?
The variable c is declared as having the type int.
int c, state = OUT, length = 0, i, j, counter[MAX];
^^^^^^
So the expression &c used in this call
strncat(word, &c, 1);
has the type int * instead of the type char *.
There is no sense to call strncat for one character. Moreover the array word has indeterminate values because it was not initialized.
char word[30], longest_word[30];
You could write
char word[30], longest_word[30];
word[0] = '\0';
And then something like the following
size_t n = 0;
while ((c = getc(input)) != EOF) {
/* If the character is alphanumeric, record it */
if (isAlphanumeric(c)) {
word[n] = ( char )c;
word[++n] = '\0';
}
/* If the character is not alphanumeric, increment the corresponding counter, and additionally, record longest word. */
else {
if (n == 27) strcpy(longest_word, word);
counter[n] += 1;
n = 0;
word[n] = '\0';
}
}
That is, the variable n will keep track of the current length of the string stored in the array word.
I wrote this code which reads every char of my text and puts it into my char array. My Problem is that the end of the file is not detected and so the fscanf() returns after the end of the text every time the last char until my array is filled. How can I prevent that? I am programming in C.
My Code:
int main() {
char array[50][50];
char buff;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
fscanf(cola, "%c", &buff);
array[i][k] = buff;
}
}
fclose(cola);
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
printf("%c", array[i][k]);
}
}
return 0;
}
Thank you for your help.
fscanf() returns the number of successful conversions. You should test the return value and also handle newline characters specifically:
#include <stdio.h>
int main(void) {
char array[50][50];
char buff;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL) {
return 1;
}
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
if (fscanf(cola, "%c", &buff) != 1 || buff == '\n') {
array[i][k] = '\0';
break;
}
array[i][k] = buff;
}
}
fclose(cola);
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50 && array[i][k] != '\0'; k++) {
printf("%c", array[i][k]);
}
printf("\n");
}
return 0;
}
The code can be simplified if you use getc() instead of fscanf() to read bytes from the file:
#include <stdio.h>
int main(void) {
char array[50][51];
int c, i, k, n;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL) {
return 1;
}
for (n = 0; n < 50; n++) {
for (k = 0; k < 50; k++) {
if ((c = getc(cola)) == EOF || c == '\n') {
break;
}
array[n][k] = c;
}
array[n][k] = '\0';
if (c == EOF && k == 0)
break;
}
fclose(cola);
for (i = 0; i < n; i++) {
puts(array[i]);
}
return 0;
}
Replace:
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
fscanf(cola, "%c", &buff);
array[i][k] = buff;
}
}
with:
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
int c = getc(cola);
if (c == EOF)
break;
array[i][k] = c;
}
}
Since buff is then unused, don't define it. Note that the return type of getc() is an int, not just a char. Always check the I/O function for success/failure. In your original code, you don't even check whether the I/O operation succeeds, which makes detecting EOF impossible.
Note that this code makes a number of assumptions that may or may not be justifiable. For example, you assume each line in the file consists of 49 characters plus a newline; you also assume you'll never need to print the information as a 'string' (your existing code does not; it prints character by character, so it is 'safe').
You might want to describe the input as:
Read up to 50 lines with up to 49 characters plus a newline in each line, storing the result in the variable array with each line being a null-terminated string.
This is more resilient to common problems (short lines, long lines, not enough lines). The code for that might be:
enum { LINE_LEN = 50, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i;
for (i = 0; i < LINE_LEN; i++)
{
int c;
int k;
for (k = 0; k < LINE_LEN; k++)
{
c = getc(cola);
if (c == EOF || c == '\n')
break;
if (k == LINE_LEN - 1)
{
/* Too long - gobble excess */
while ((c = getc(cola)) != EOF && c != '\n')
;
break;
}
array[i][k] = c;
}
array[i][k] = '\0';
if (c == EOF)
break;
}
int num_lines = i; // You have num_lines lines of data in your array
I found one version of the Coca Cola™ ASCII art image at https://www.ascii-code.com/ascii-art/logos/coca-cola.php which looks similar to what you have in your images, but there are many other sources and variants:
__ ___ __ .ama ,
,d888a ,d88888888888ba. ,88"I) d
a88']8i a88".8"8) `"8888:88 " _a8'
.d8P' PP .d8P'.8 d) "8:88:baad8P'
,d8P' ,ama, .aa, .ama.g ,mmm d8P' 8 .8' 88):888P'
,d88' d8[ "8..a8"88 ,8I"88[ I88' d88 ]IaI" d8[
a88' dP "bm8mP8'(8'.8I 8[ d88' `" .88
,88I ]8' .d'.8 88' ,8' I[ ,88P ,ama ,ama, d8[ .ama.g
[88' I8, .d' ]8, ,88B ,d8 aI (88',88"8) d8[ "8. 88 ,8I"88[
]88 `888P' `8888" "88P"8m" I88 88[ 8[ dP "bm8m88[.8I 8[
]88, _,,aaaaaa,_ I88 8" 8 ]P' .d' 88 88' ,8' I[
`888a,. ,aadd88888888888bma. )88, ,]I I8, .d' )88a8B ,d8 aI
"888888PP"' `8""""""8 "888PP' `888P' `88P"88P"8m"
This file's longest line is the first at 67 characters plus newline; the shortest is 61 characters plus newline. The file only has 13 lines and 845 characters (LF line endings) in total. Thus, your program is ill-equipped to deal with this particular data file. It looks for 2,500 characters, and won't get them.
My complete test code was rigged to read from standard input, rather than a fixed file name.
#include <stdio.h>
int main(void)
{
FILE *cola = stdin;
enum { LINE_LEN = 80, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i; // Need value of i after loop
for (i = 0; i < NUM_LINES; i++)
{
int c; // Need value of c after loop
int k;
for (k = 0; k < LINE_LEN; k++)
{
c = getc(cola);
if (c == EOF || c == '\n')
break;
if (k == LINE_LEN - 1)
{
/* Too long - gobble excess */
while ((c = getc(cola)) != EOF && c != '\n')
;
break;
}
array[i][k] = c;
}
array[i][k] = '\0';
if (c == EOF)
break;
}
int num_lines = i; // You have num_lines lines of data in your array
for (i = 0; i < num_lines; i++)
puts(array[i]);
return 0;
}
I tested it on the data file shown, with an empty line at the end, and with a couple of lines containing more than 79 characters after the blank line. It handled all those special cases correctly. Note that handling user input is hard; handling perverse user input is harder. The code is less compact. You could change the rules and then change the code to match. I'm not sure this is the most minimal way to code this; it does work, however. It might be better to have a function to handle the inner input loop; the outer loop could test the return value from that function. This would cut down on the special case handling.
#include <assert.h>
#include <limits.h>
#include <stdio.h>
static int read_line(FILE *fp, size_t buflen, char *buffer)
{
assert(buflen < INT_MAX);
int c; // Need value of c after loop
size_t k; // Need value of k after loop
for (k = 0; k < buflen; k++)
{
if ((c = getc(fp)) == EOF || c == '\n')
break;
if (k == buflen - 1)
{
/* Too long - gobble excess */
while ((c = getc(fp)) != EOF && c != '\n')
;
break;
}
buffer[k] = c;
}
buffer[k] = '\0';
return (k == 0 && c == EOF) ? EOF : (int)k;
}
int main(void)
{
enum { LINE_LEN = 80, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i;
for (i = 0; i < NUM_LINES; i++)
{
if (read_line(stdin, LINE_LEN, array[i]) == EOF)
break;
}
int num_lines = i;
for (i = 0; i < num_lines; i++)
puts(array[i]);
return 0;
}
This produces the same output from the same input as the previous version.
int main() {
//char array[50][50];
char buff;
int t;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL)
{
printf("Cannot open file \n");
exit(0);
}
while (1) {
t = fgetc(cola);
if (t == EOF)
break;
buff = t;
printf("%c", buff);
}
fclose(cola);
return 0;
}
I'm trying to build a program that takes two strings and fills in the edit distance matrix for them. The thing that is tripping me up is, for the second string input, it is skipping over the second input. I've tried clearing the buffer with getch(), but it didn't work. I've also tried switching over to scanf(), but that resulted in some crashes as well. Help please!
Code:
#include <stdio.h>
#include <stdlib.h>
int min(int a, int b, int c){
if(a > b && a > c)
return a;
else if(b > a && b > c)
return b;
else
return c;
}
int main(){
// allocate size for strings
int i, j;
char *input1 = (char*)malloc(sizeof(char)*100);
char *input2 = (char*)malloc(sizeof(char)*100);
// ask for input
printf("Enter the first string: ");
fgets(input1, sizeof(input1), stdin);
printf("\nEnter the second string: ");
fgets(input2, sizeof(input2), stdin);
// make matrix
int len1 = sizeof(input1), len2 = sizeof(input2);
int c[len1 + 1][len2 + 1];
// set up input 2 length
for(i = 0; i < len2 + 1; i++){
c[0][i] = i;
}
// set up input 1 length
for(i = 0; i < len1 + 1; i++){
c[i][0] = i;
}
// fill in the rest of the matrix
for(i = 1; i < len1; i++){
for(j = 1; j < len2; j++){
if(input1[i] == input2[j]) // if the first letters are equal make the diagonal equal to the last
c[i][j] = c[i - 1][j - 1];
else
c[i][j] = 1 + min(c[i - 1][j - 1], c[i - 1][j], c[i][j - 1]);
}
}
// print the matrix
printf("\n");
for(j = 0; j < len2; j++){
for(i = 0; i < len1; i++){
printf("| %d", c[i][j]);
}
printf("\n");
}
return 1;
}
Stick with fgets.
As others have pointed out, use char input1[100] instead of char *input1 = malloc(...)
But, even with that change, which makes the sizeof inside of the fgets correct, using sizeof when setting up len1 and len2 is wrong. You'll be processing an entire buffer of 100, even if their are only 10 valid characters in it (i.e. the remaining ones are undefined/random).
What you [probably] want is strlen [and a newline strip] to get the actual useful lengths.
Here's the modified code [please pardon the gratuitous style cleanup]:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
min(int a, int b, int c)
{
if (a > b && a > c)
return a;
if (b > a && b > c)
return b;
return c;
}
int
main(void)
{
// allocate size for strings
int i;
int j;
char input1[100];
char input2[100];
// ask for input
printf("Enter the first string: ");
fgets(input1, sizeof(input1), stdin);
int len1 = strlen(input1);
if (input1[len1 - 1] == '\n') {
input1[len1 - 1] = 0;
--len1;
}
printf("\nEnter the second string: ");
fgets(input2, sizeof(input2), stdin);
int len2 = strlen(input2);
if (input2[len2 - 1] == '\n') {
input2[len2 - 1] = 0;
--len2;
}
// make matrix
int c[len1 + 1][len2 + 1];
// set up input 2 length
for (i = 0; i < len2 + 1; i++) {
c[0][i] = i;
}
// set up input 1 length
for (i = 0; i < len1 + 1; i++) {
c[i][0] = i;
}
// fill in the rest of the matrix
for (i = 1; i < len1; i++) {
for (j = 1; j < len2; j++) {
// if the 1st letters are equal make the diagonal equal to the last
if (input1[i] == input2[j])
c[i][j] = c[i - 1][j - 1];
else
c[i][j] = 1 + min(c[i - 1][j - 1], c[i - 1][j], c[i][j - 1]);
}
}
// print the matrix
printf("\n");
for (j = 0; j < len2; j++) {
for (i = 0; i < len1; i++) {
printf("| %d", c[i][j]);
}
printf("\n");
}
return 1;
}
UPDATE:
Okay sweet I see what you mean! The reason I was trying to use malloc though was to avoid making the matrix that I had to print a size of 100x100 blank spaces.
With either the fixed size input1 or the malloced one, fgets will only fill it to the input size entered [clipped to the second argument, if necessary]. But, it does not pad/fill the remainder of the buffer with anything (e.g. spaces on the right). What it does do is add an EOS [end-of-string] character [which is a binary 0x00] after the last char read from input [which is usually the newline].
Thus, if the input string is: abcdef\n, the length [obtainable from strlen] is 7, input[7] will be 0x00, and input1[8] through input1[99] will have undefined/random/unpredictable values and not spaces.
Since a newline char isn't terribly useful, it is often stripped out before further processing. For example, it isn't terribly relevant when computing edit distance for a small phrase.
Does using strlen() only count the number of chars inside the array, or does it include all the blank spaces too?
As I mentioned above, fgets does not pad the string at the end, so, not to worry. It will do what you want/expect.
strlen only counts chars up to [but not including the EOS terminator character (i.e.) zero]. If some of these chars happen to be spaces, they will be counted by strlen--which is what we want.
Consider computing the edit distance between any two of the following phrases:
quick brown fox jumped over the lazy dogs
the quick brown fox jumped over lazy dogs
quick brown fox jumps over the lazy dog
In each case, we want strlen to include the [internal/embedded] spaces in the length calculation. That's because it is perfectly valid to compute the edit distance of phrases.
There is a valid usage for malloc: when the amount of data is too big to fit on the stack. Most systems have a default limit (e.g. under linux, it's 8 MB).
Suppose we were computing the edit distance for two book chapters [read from files], we'd have (e.g.):
char input1[50000];
char input2[50000];
The above would fit, but the c matrix would cause a stack overflow:
int c[50000][50000];
because the size of this would be 50000 * 50000 * 4 which is approx 9.3 GB.
So, to fit all this data, we'd need to allocate it on the heap. While it is possible to do a malloc for c and maintain the 2D matrix access, we'd have to create a function and pass off the pointer to c to it.
So, here's a modified version that takes input of arbitrarily large sizes:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#define sysfault(_fmt...) \
do { \
fprintf(stderr,_fmt); \
exit(1); \
} while (0)
#define C(y,x) c[((y) * (len2 + 1)) + (x)]
long
min(long a, long b, long c)
{
if (a > b && a > c)
return a;
if (b > a && b > c)
return b;
return c;
}
char *
input(const char *prompt,long *lenp,const char *file)
{
FILE *fp;
char *lhs;
int chr;
long siz;
long len;
if (file != NULL)
fp = fopen(file,"r");
else {
fp = stdin;
printf("Enter %s string: ",prompt);
fflush(stdout);
}
lhs = NULL;
siz = 0;
len = 0;
while (1) {
chr = fgetc(fp);
if (chr == EOF)
break;
if ((chr == '\n') && (file == NULL))
break;
// grow the character array
if ((len + 1) >= siz) {
siz += 100;
lhs = realloc(lhs,siz);
if (lhs == NULL)
sysfault("input: realloc failure -- %s\n",strerror(errno));
}
lhs[len] = chr;
len += 1;
}
if (file != NULL)
fclose(fp);
if (lhs == NULL)
sysfault("input: premature EOF\n");
// add the EOS
lhs[len] = 0;
// return the length to the caller
*lenp = len;
return lhs;
}
int
main(int argc,char **argv)
{
long i;
long j;
char *input1;
long len1;
char *input2;
long len2;
long *c;
--argc;
++argv;
switch (argc) {
case 2:
input1 = input("first",&len1,argv[0]);
input2 = input("second",&len2,argv[1]);
break;
default:
input1 = input("first",&len1,NULL);
input2 = input("second",&len2,NULL);
break;
}
// make matrix
c = malloc(sizeof(*c) * (len1 + 1) * (len2 + 1));
if (c == NULL)
sysfault("main: malloc failure -- %s\n",strerror(errno));
// set up input 2 length
for (i = 0; i < len2 + 1; i++) {
C(0,i) = i;
}
// set up input 1 length
for (i = 0; i < len1 + 1; i++) {
C(i,0) = i;
}
// fill in the rest of the matrix
for (i = 1; i < len1; i++) {
for (j = 1; j < len2; j++) {
// if the 1st letters are equal make the diagonal equal to the last
if (input1[i] == input2[j])
C(i,j) = C(i - 1,j - 1);
else
C(i,j) = 1 + min(C(i - 1,j - 1), C(i - 1,j), C(i,j - 1));
}
}
// print the matrix
printf("\n");
for (j = 0; j < len2; j++) {
for (i = 0; i < len1; i++) {
printf("| %ld", C(i,j));
}
printf("\n");
}
return 1;
}
I have rjecnik.txt file that looks like this
mate sime, jure
stipica gujo, prvi
ante mirkec
goran maja, majica
avion kuca, brod, seoce
amerika, neka, zemlja, krcma
brodarica, zgrada, zagreb
zagreb split
zadar rijeka
andaluzija azija
I need to order lines alphabetically (not words) and my program produces this result which is not correct:
andaluzija azijamate sime, jure
amerika, neka, zemlja, krcma
brodarica, zgrada, zagreb
ante mirkec
avion kuca, brod, seoce
goran maja, majica
stipica gujo, prvi
zadar rijeka
zagreb split
Press [Enter] to close the terminal ...
When I use non ascii character like kuća for kuca or krčma for krcma it produces this result (all wrong)
andaluzija azijamate sime, jure
amerika, neka, zemlja, krŔma
brodarica, zgrada, zagreb
ante mirkec
avion kuŠa, brod, seoce
goran maja, majica
stipica gujo, prvi
zadar rijeka
zagreb split
Press [Enter] to close the terminal ...
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
int ch, nl = 1, min, lenght1, lenght2, lenght;//ch will hold characters, min is for selection sort, lenght holds value of strlen for determine wthat line is longer
FILE * fp;// FILE pointer
char * lines[1000];//that will dynamically hold strings for lines
char * temp;//for lines swaping
if((fp = fopen("C:\\Users\\don\\Documents\\NetBeansProjects\\proba2\\dist\\Debug\\MinGW-Windows\\rjecnik.txt", "r")) == NULL)//I had to temporarily put full path to rjecnik.txt
{
printf("Can't open file...");
exit(1);
}
while((ch = getc(fp)) != EOF)//count lines
{
if(ch == '\n')
nl++;
}
int i, j;
for (i = 0; i < nl; i++)
lines[i] = malloc(1000);//create array of string size value of nl
fseek(fp, 0L, SEEK_SET);//go to start of file
i = 0;
j = 0;
while((ch = getc(fp)) != EOF)//fill arrays of string
{
lines[i][j] = ch;
j++;
if(ch == '\n')
{
j = 0;
i++;
}
}
for(i = 0; i < nl - 1; i++)//selection sort doesn't work properly
{
min = i;//min is i
for(j = i + 1; j < nl; j++)//for number of lines(nl) times
{
lenght1 = strlen(lines[i]);//find what string is longer and lenght is smaller one
lenght2 = strlen(lines[j]);
if(lenght1 < lenght2)
lenght = lenght1;
else
lenght = lenght2;
if(strncmp(lines[i], lines[j], lenght) > 0 )//compare two strings
min = j;//if second string is alphabetically smaller min is j
}
temp = lines[i];// swapping
lines[i] = lines[min];
lines[min] = temp;
}
for(i = 0; i < nl; i++ )//printing to console
{
lenght1 = strlen(lines[i]);
for(j = 0; j < lenght1; j++ )
{
putchar(lines[i][j]);
}
}
return 0;
}
Now program crashes at the end when I add this code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
int ch, nl = 1, min, lenght1, lenght2, lenght;//ch will hold characters, min is for selection sort, lenght holds value of strlen for determine wthat line is longer
FILE * fp;// FILE pointer
char * lines[1000];//that will dynamically hold strings for lines
char * temp;//for lines swaping
if((fp = fopen("C:\\Users\\don\\Documents\\NetBeansProjects\\proba2\\dist\\Debug\\MinGW-Windows\\rjecnik.txt", "r")) == NULL)//I had to temporarily put full path to rjecnik.txt
{
printf("Can't open file...");
exit(1);
}
while((ch = getc(fp)) != EOF)//count lines
{
if(ch == '\n')
nl++;
}
int i, j;
for (i = 0; i < nl; i++)
lines[i] = malloc(1000);//create array of string size value of nl
fseek(fp, 0L, SEEK_SET);//go to start of file
i = 0;
j = 0;
while((ch = getc(fp)) != EOF)//fill arrays of string
{
lines[i][j] = ch;
j++;
if(ch == '\n')
{
j = 0;
i++;
}
}
for(i = 0; i < nl - 1; i++)//selection sort doesn't work properly
{
min = i;//min is i
for(j = i + 1; j < nl; j++)//for number of lines(nl) times
{
lenght1 = strlen(lines[i]);//find what string is longer and lenght is smaller one
lenght2 = strlen(lines[j]);
if(lenght1 < lenght2)
lenght = lenght1;
else
lenght = lenght2;
if(strncmp(lines[min], lines[j], lenght ) > 0 )//compare two strings
min = j;//if second string is alphabetically smaller min is j
}
temp = lines[i];// swapping
lines[i] = lines[min];
lines[min] = temp;
}
for(i = 0; i < nl; i++ )//printing to console
{
lenght1 = strlen(lines[i]);
for(j = 0; j < lenght1; j++ )
{
putchar(lines[i][j]);
}
}
for (i = 0; i < 100; i++)//Program crashes here
free(lines[i]);
return 0;
}
1.- You must initialize lines to 0 after malloc so strlen works properly.
2.- Compare lines[j] with lines[min]
3.- Don't forget free lines
You're always comparing lines[j] to lines[i], but you should be comparing it to lines[min].
If this isn't you learning about how to sort and get input, c provides qsort() and fgets(), so you could
int strsort(const void *a, const void *b)
{
char *const*astr=a, *const*bstr=b;
return strcmp(*astr, *bstr);
}
main()
{
FILE*f = fopen(...);
char (*arr)[1000] = malloc(1000*1000);
int x;
for(x=0;x<1000 && fgets(1000, arr[x], f);x++)
arr[x][strlen(arr[x])-2] = '\0'; //strip newlines
qsort(arr, x, 1, strsort);
int i;
for(i=0; i<x; i++)
printf("%s\n", arr[x]);
}
It's much clearer what you're doing this way.
Minor nitpick:
lenght1 = strlen(lines[i]);
lenght2 = strlen(lines[j]);
if(lenght1 < lenght2)
lenght = lenght1;
else
lenght = lenght2;
if(strncmp(lines[i], lines[j], lenght) > 0 )
... ;
You don't need this: strcmp() stops when either of the strings terminates, whichever comes first. In your case, you need to compare one more character (the NUL), like
strncmp( lines[i], lines[j], lenght+1)
, otherwise "apple" and "apples" would compare equal (because only the first five characters would be compared). But the "normal" form:
strcmp(lines[i], lines[j])
does exactly what you want.