input files in C - c

Code implements the dynamic programming solution for global pairwise alignment of two sequences. Trying to perform a semi-global alignment between the SARS-CoV-2 reference genome and the first read in the Nanopore sample. The length of the reference genome is 29903 base pairs and the length of the first Nanopore read is 1246 base pairs. When I run the following code, I get this message in my terminal:
Usage: align < input file >
How do I add the necessary files to the code. The file names are SARS-CoV-2 reference genome.txt and Nanopore.txt, where A = SARS-CoV-2 reference genome.txt file and B = Nanopore.txt file
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GAP -2
#define MATCH 5
#define MISMATCH -3
#define MAXLENGTH_A 29904
#define MAXLENGTH_B 1247
int max(int A, int B, int C)
{
if (A>=B && A>=C) return A;
else if (B>=A && B>=C) return B;
else return C;
}
char Tmax(int A, int B, int C)
{
if (A>B && A>C) return 'D';
else if (B>A && B>C) return 'L';
else return 'U';
}
int m(char p, char q)
{
if (p==q) return MATCH;
else return MISMATCH;
}
void append(char *st,int L,char c)
{
int i;
for (i=L;i>0;i--)
st[i]=st[i-1];
st[L+1] = '\0';
st[0] = c;
}
int main(int argc, char **argv)
{
FILE *fp;
char A[1000];
char B[1000];
char RA[1000];
char RM[1000];
char RB[1000];
int N,M,L;
int i,j;
//int S[MAXLENGTH_A][MAXLENGTH_B];
//char T[MAXLENGTH_A][MAXLENGTH_B];
int **S;
char **T;
S = (int**)malloc(sizeof(int*)*MAXLENGTH_A);
for (int i = 0; i<29904; i++)
S[i] = (int*)malloc(sizeof(int)*MAXLENGTH_B);
T = (char**)malloc(sizeof(char*)*MAXLENGTH_A);
for (int i = 0; i<29904; i++)
T[i] = (char*)malloc(sizeof(char)*MAXLENGTH_B);
if (argc!=2)
{
printf("Usage: align <input file>\n");
exit(1);
}
fp = fopen(argv[2],"r");
if (fp==NULL)
{
printf("input file not found.\n");
exit(1);
}
fscanf(fp,"%s",A);
fscanf(fp,"%s",B);
printf("Sequence A: %s\n",A);
printf("Sequence B: %s\n",B);
N = strlen(A);
M = strlen(B);
S[0][0] = 0;
T[0][0] = 'D';
// initialize first column
for (i=0;i<=N;i++)
{
S[i][0] = GAP*i;
T[i][0] = 'U';
}
//initialize the firt row
for (i=0;i<=M;i++)
{
S[0][i] = GAP*i;
T[0][i] = 'L';
}
for (i=1;i<=N;i++)
for (j=1;j<=M;j++)
{
S[i][j] = max(S[i-1][j-1]+m(A[i-1],B[j-1]),S[i][j-1]+GAP,S[i-1][j]+GAP);
T[i][j] = Tmax(S[i-1][j-1]+m(A[i-1],B[j-1]),S[i][j-1]+GAP,S[i-1][j]+GAP);
}
printf("The score of the alignment is : %d\n",S[N][M]);
i=N;
j=M;
L=0;
RA[0]='\0';
RB[0]='\0';
RM[0]='\0';
while (i!=0 || j!=0)
{
if (T[i][j]=='D')
{
append(RA,L,A[i-1]);
append(RB,L,B[j-1]);
if (A[i-1]==B[j-1]) append(RM,L,'|');
else append(RM,L,'*');
i--; j--;
}
else if (T[i][j]=='L')
{
append(RA,L,'-');
append(RB,L,B[j-1]);
append(RM,L,' ');
j--;
}
else if (T[i][j]=='U')
{
append(RA,L,A[i-1]);
append(RB,L,'-');
append(RM,L,' ');
i--;
}
L++;
}
printf("%s\n",RA);
printf("%s\n",RM);
printf("%s\n",RB);
}

These lines of the program
printf("Usage: align <input file>\n");
/* AND */
fp = fopen(argv[2],"r");
/* AND */
fscanf(fp,"%s",A);
fscanf(fp,"%s",B);
show the program expects to read from one file.
From your question, it seems you could make a third data file with one line containing the 29903 characters without spaces or breaks of the first sequence followed by a second line containing 1246 characters (again continuous characters) of the second sequence. But, don't do this...
When the program is run (eg: ./a.out filename) and one supplies the name of the 'combined' file, the first fscanf() will attempt to load the first block of characters into the array named 'A'. The second fscanf() would load the second block into the array named 'B'.
This is a problem because both 'A' and 'B' are dimensioned to hold only 1000 bytes each, maximum...
As this code is right now, you cannot use it to load the long sequences you want to. Sorry.

Related

How to add space between the characters if two consecutive characters are equal in c?

I need to add add space if two consecutive characters are same.
For example:
input:
ttjjjiibbbbhhhhhppuuuu
Output:
t tjjji ibbbbhhhhhp puuuu
If the two consecutive characters are same then need to print space between two consecutive characters....if the consecutive characters are greater than two no need to add space.
My code:
#include <stdio.h>
#include <string.h>
int main()
{
char s[100]="ttjjjiibbbbhhhhhppuuuu";
for(int i=0;i<strlen(s);i++){
if(s[i]!=s[i-1] && s[i]==s[i+1]){
s[i+1]=' ';
}
}
printf("%s",s);
}
my output:
t j ji b b h h hp u u
What mistake i made??
Your primary mistake is writing to your input when the string needs to grow. That's not going to work well and is hard to debug.
This is typical of C Code: measure once, process once. Same-ish code appears twice.
Variables:
int counter;
char *ptr1;
char *ptr2;
char *t;
Step 1: measure
for (ptr1 = s; *ptr1; ptr1++)
{
++counter;
if (ptr1[0] == ptr1[1] && ptr1[0] != ptr1[2] && (ptr1 == s || ptr1[-1] != ptr1[0]))
++counter;
}
Step 2: copy and process
t = malloc(counter + 1);
for (ptr1 = s, ptr2 = t; *ptr1; ptr1++)
{
*ptr2++ = *ptr1;
if (ptr1[0] == ptr1[1] && ptr1[0] != ptr1[2] && (ptr1 == s || ptr1[-1] != ptr1[0]))
*ptr2++ = ' ';
}
ptr2[0] = '\0';
Another solution: Calculate the length of consective characters and handle the special case(Length == 2).
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
char s[100] = "ttjjjiibbbbhhhhhppuuuu";
char tmp_ch = s[0];
int cnt = 1;
for (int i = 1; i < strlen(s); i++) {
while (s[i] == tmp_ch) {
cnt++;
i++;
if (i == strlen(s)) {
break;
}
}
if (cnt == 2) {
putchar(tmp_ch);
putchar(' ');
putchar(tmp_ch);
} else {
for (int j = 0; j < cnt; j++) {
putchar(tmp_ch);
}
}
tmp_ch = s[i];
cnt = 1;
}
return 0;
}
Another approach is to use strspn() to get the number of consecutive characters as you work down the string. The prototype for strspn() is:
size_t strspn(const char *s, const char *accept);
Where strspn() returns the number of bytes in the initial segment of s which consist only of bytes from accept. (e.g. using the current character in a 2-character string as accept, it gives the number of times that character appears in sequence)
Tracking the number of charters returned and updating an offset from the beginning allows you to simply loop letting strspn() do the work as you work though your string. All you are concerned with is when strspn() returns 2 identifying where two, and only two, of the same character are adjacent to one another.
You can do:
#include <stdio.h>
#include <string.h>
int main (void) {
char *input = "ttjjjiibbbbhhhhhppuuuu";
char chstr[2] = {0}; /* 2 char string for accept parameter */
size_t nchr = 0, offset = 0; /* no. chars retured, current offset */
*chstr = input[offset]; /* initialize with 1st char */
/* while not at end, get number of consecutive character(s) */
while (*chstr && (nchr = strspn (input + offset, chstr))) {
if (nchr == 2) { /* if 2 - add space */
putchar (input[offset]);
putchar (' ');
putchar (input[offset]);
}
else { /* otherwise, loop nchr times outputting char */
size_t n = nchr;
while (n--)
putchar(input[offset]);
}
offset += nchr; /* add nchr to offset */
*chstr = input[offset]; /* store next char in string */
}
putchar ('\n'); /* tidy up with newline */
}
Example Use/Output
$ /bin/space_between_2
t tjjji ibbbbhhhhhp puuuu
Let me know if you have further questions concerning the use of strspn().

Segmentation Fault when My Code Executes the printf() in c

below I have posted my code. When I compile I receive no errors, and only one warning about variables I haven't used yet. the code works all the way to the line in code where it starts to print. I have tested all the sections and I believe that one is at fault. please let me know what I am doing wrong so I can fix it.
#include <stdio.h>
#include <string.h>
#define NUM_LINES 37
#define LINE_LENGTH 60
void select_sort_str(char list[NUM_LINES][LINE_LENGTH], int n);
int alpha_first(char list[NUM_LINES][LINE_LENGTH], int min_sub, int max_sub);
int main (void){
//store each line in an array of strings
FILE *inp;
FILE *outp;
char hurr[NUM_LINES][LINE_LENGTH];
;
inp = fopen("hurricanes.csv","r");
outp = fopen("out.txt","w");
//read in lines from file
for (int i = 0; i<NUM_LINES; i++){
fgets(hurr[i], LINE_LENGTH, inp);
}
inp = fopen("hurricanes.cvs","r");
//printf("%s", hurr[0]);
//define function
select_sort_str(hurr, NUM_LINES);
return(0);
}
int
alpha_first(char list[NUM_LINES][LINE_LENGTH], // input - array of pointers to strings
int min_sub, // input - min and max subscripts of
int max_sub) // portion of list to consider
{
int first, i;
first = min_sub;
for (i = min_sub + 1; i <= max_sub; ++i) {
if (strcmp(list[i], list[first]) < 0) {
first = i;
}
}
return (first);
}
/*
* Orders the pointers in an array list so they access strings in
* alphabetical order
* Pre: first n elements of list reference string of uniform case;
* n >= 0
*/
void
select_sort_str(char list[NUM_LINES][LINE_LENGTH], // input/output - array of pointers being
// ordered to acces strings alphabetically
int n) // input - number of elements to sort
{
int fill, // index of element to contain next string in order
index_of_min; // index of next string in order
char *temp;
char temp1[NUM_LINES][LINE_LENGTH];
for (fill = 0; fill < n - 1; ++fill) {
index_of_min = alpha_first(list, fill, n - 1);
if (index_of_min != fill) {
temp = list[index_of_min];
list[index_of_min][LINE_LENGTH] = list[fill][LINE_LENGTH];
strncpy(temp1[index_of_min], list[index_of_min], LINE_LENGTH);
temp1[fill][LINE_LENGTH] = *temp;
}
}
char *name;
char *cat = 0;
char *date;
for (int i = 0; i<NUM_LINES; i++){
name = strtok(NULL, ",");
cat = strtok(NULL, "h");
date = strtok(NULL, " ");
printf("%s %s %s\n", name, cat, date);
}
// for( int i =0; i<NUM_LINES; i++){
// printf("%s", list[i]);
// }
}
The only first parameter you ever pass to strtok is NULL. You never actually give it anything to parse. Did you perhaps mean strtok(temp1[i], ",");?
Also, why no error checking? It's much easier to find bugs in code with error checking.

Checking for null/empty float values when using sscanf

The following program attempts to read an input file line by line using fgets, and save each comma delimited float value into an array of structs using sscanf (this aspect of the code works fine). The issue lies in that the program should also detect when a float value is missing/empty, and assign it the float value 1.500 which then is saved into the array of structs.
EDIT: This is supposed to be compiled using VS2017, so on Windows.
*Note: Please note that the following questions have been studied before posting this question:
How to check if a string returned by scanf is null
How to get scanf to continue with empty scanset
An example of the input file (missing value in the second row):
0.123f, 0.234f, 0.345f, 0.456f, 0.567f
1.987f, , 7.376f, 2.356f, 5.122f
9.111f, 1.234f, 7.091f, 6.672f, 9.887f
Desired output (missing value in second row is detected and set to 1.500):
0.123 0.234 0.345 0.456 0.567
1.987 1.500 7.376 2.356 5.122
9.111 1.234 7.091 6.672 9.887
So far, the first attempt tried to scan all 5 floats (each with 'f' suffix) into strings and then check to see if those strings are null/empty or of zero length using strcmp and strlen, respectively, and finally involved trying to use sscanf again on each of those variables to read each into an array of structs.
The 2nd attempt included a check to see if the sscanf was successful by using if (sscanf(line, "%ff", &data[i].x) == NULL) { // ...some alert and assign 1.500}, which did not work either. The 3rd attempt, as seen below:
#include "stdio.h"
int main() {
typedef struct {
float x, y, vx, vy, mass;
}DATA;
FILE *file = fopen("null_detector.txt", "r");
if (file == NULL)
{
printf(stderr, "ERROR: file not opened.\n");
return EXIT_FAILURE;
}
int N= 3;
DATA* data = malloc(Nbodies * sizeof * data); // Array allocation
char line[256];
int i;
int inc = 1;
for (i = 0; i < Nbodies; i += inc)
{
fgets(line, sizeof(line), file);
// **Some info:
// Scan 5 float variables per line (this part works fine)
sscanf(line, "%ff, %ff, %ff, %ff, %ff",
&data[i].x, &data[i].y, &data[i].vx, &data[i].vy, &data[i].mass); // %ff accounts for 'f' suffix
// Now check if any of above vars are empty/NULL.
// NOTE: aware that these vars CANNOT be compared to NULL,
// but has been included to try and provide clarity for end goal
if (data[i].x == NULL)
{
//.. assign 1.500 to data[i].x
}
if (data[i].y == NULL)
{
//... same as above etc
}
// ...Repeat IF statements for all 5 vars
}
//Print the contents of array of structs to check for correct output
for (i = 0; i < Nbodies; i++)
{
printf("%.3f %.3f %.3f %.3f %.3f\n", data[i].x, data[i].y, data[i].vx, data[i].vy, data[i].mass);
}
return 0;
}
Summary:
Does anyone know how this program can be modified to:
detect missing float values in each line of the file upon reading them with fgets
replace missing float values with the float value 1.500
write these values to the array of structs, like the non-missing values successfully are doing?
As commented in the code, I am aware that the struct float variables cannot be compared to NULL. I have included this comparison in the code to only try to add some clarity as to what the end goal is.
You can use strsep to separate each line.
str = strsep(&line, ",")
Using one function to set the value of data:
void set_data(DATA *dt, int count, float f) {
switch(count) {
case 0: dt->x = f; break;
case 1: dt->y = f; break;
case 2: dt->vx = f; break;
case 3: dt->vy = f; break;
case 4: dt->mass = f; break;
}
}
The complete code:
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
typedef struct {
float x, y, vx, vy, mass;
}DATA;
void set_data(DATA *dt, int count, float f) {
switch(count) {
case 0: dt->x = f; break;
case 1: dt->y = f; break;
case 2: dt->vx = f; break;
case 3: dt->vy = f; break;
case 4: dt->mass = f; break;
}
}
int main() {
FILE *file = fopen("text.txt", "r");
if (file == NULL)
{
printf( "ERROR: file not opened.\n");
return EXIT_FAILURE;
}
int N= 3;
DATA* data = malloc(N * sizeof(data)); // Array allocation
char *line;
int i;
int inc = 1;
size_t n = 0;
for (i = 0; i < N; i += inc)
{
getline(&line, &n, file);
int count = 0;
char *str;
while((str = strsep(&line, ",")) != NULL) {
if (strcmp(str, " ") == 0) {
set_data(&data[i], count, 1.5);
} else {
set_data(&data[i], count, atof(str));
}
// printf("count = %d\n", count);
// printf("token: %s\n", str);
count++;
}
}
//Print the contents of array of structs to check for correct output
for (i = 0; i < N; i++)
{
printf("%.3f %.3f %.3f %.3f %.3f\n", data[i].x, data[i].y, data[i].vx, data[i].vy, data[i].mass);
}
return 0;
}
The input:
#cat text.txt
0.123f, 0.234f, 0.345f, 0.456f, 0.567f
1.987f, , 7.376f, 2.356f, 5.122f
9.111f, 1.234f, 7.091f, 6.672f, 9.887
The output:
0.123 0.234 0.345 0.456 0.567
1.987 1.500 7.376 2.356 5.122
9.111 1.234 7.091 6.672 9.887
It can also achieved with only sscanf if there is at least a space between the commas when there is an absence of an input value.
#include <stdio.h>
int main(void) {
char *str[] = {"0.123f, 0.234f, 0.345f, 0.456f, 0.567f",
"1.987f, , 7.376f, 2.356f, 5.122f",
"9.111f, 1.234f, 7.091f, 6.672f, 9.887f"};
float float_arr[3][5];
char temp[5][7];
for (unsigned i = 0; i < 3; i++) {
if (5 != sscanf(str[i], "%6[^,],%6[^,],%6[^,],%6[^,],%6[^,]",
temp[0], temp[1], temp[2], temp[3], temp[4]))
return printf("Error\n"), 1;
for (unsigned j = 0; j < 5; j++)
if (1 != sscanf(temp[j], "%ff", &float_arr[i][j]))
float_arr[i][j] = 1.500f;
}
// printing the result
for (unsigned i = 0; i < 3; i++) {
for (unsigned j = 0; j < 5; j++)
printf("%ff ", float_arr[i][j]);
printf("\n");
}
return 0;
}
Output
0.123000f 0.234000f 0.345000f 0.456000f 0.567000f
1.987000f 1.500000f 7.376000f 2.356000f 5.122000f
9.111000f 1.234000f 7.091000f 6.672000f 9.887000f

How to check first letter of one string with last letter of another string inside of same char array

How can I complete the function canArrangeWords() ?
Question : Given a set of words check if we can arrange them in a list such that the last letter of any word and first letter of another word are same. The input function canArrangeWords shall contain an integer num and array of words arr. num denotes the number of word in the list (1<=num<=100). arr shall contain words consisting of lower case letters between 'a' - 'z' only . return 1 if words can be arranged in that fashion and -1 if cannot.
Input : 4 pot ten nice eye
output : 1
input : 3 fox owl pond
output: -1
Please help me complete this program .
**
#include<stdio.h>
#include<string.h>
int canArrangewords(int,char [100][100]);
void main(){
int n ,count=0 , i ;
char arrayS[100][100];
scanf("%d",&n);
for (i = 0; i < n; ++i)
{
scanf("%s",arrayS[i]);
}
for(i=0;i<n;i++)
{
printf("%s",arrayS[i]);
printf("\n");
}
printf("%c\n",arrayS[2][4]);
canArrangewords(n , arrayS);
}
int canArrangewords(int n,char arrayS[100][100]){
int i , j ;
for ( i = 0; i < n; i++)
{
for ( j = i+1 ; j < strlen(arrayS[j+1]); i++)
{
int flag = strlen(arrayS[j+1]) - 1;
int temp = strcmp(arrayS[i][0],arrayS[j][flag]);
}
}
}
}
Well, first of all think of the way you can reach that answer.
If you only need to know if they can or can not be arranged and you do not have to do so your self you can use an empty array of int array[26] for each letter a-z.
The rule is that from all the first and last letters for all the words only two MAY appear an odd amount of times - the first letter of first word in list and the last letter in the last word in the list, the rest MUST appear an even amount of times. I would add a check to make sure the letters are lowercase as well. good luck!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MINASCII 97
#define LETTERS 26
void UpdateArray(char letter, int* arr)
{
if(arr[letter - MINASCII] == 0)
{
++arr[letter - MINASCII];
}
else
{
--arr[letter - MINASCII];/*for each second time same letter is seen reduce back to zero */
}
}
int canArrangewords(int wordNum, char* wordArr[])
{
int arr[LETTERS] = {0};
int i = 0;
int count = 0 ;
char first;
char last;
char* string;
for (i= 0; i< wordNum; ++i)
{
string = wordArr[i];
first = string[0];
last = string[strlen(string)-1];
UpdateArray(first, &arr[0]);
UpdateArray(last, &arr[0]);
}
for(i = 0; i< LETTERS; ++i)
{
count+=arr[i];
}
if(count == 2 || count == 0)/*either once each or twice -see word1 example in main*/
{
return 1;
}
return -1;
}
int main()
{
int i = 0;
char* words[] = {"pot", "ten", "nice", "eye"};
char* words1[] = {"pot", "ten", "nip"};
char* words2[] = {"fox", "owl", "pond"};
i = canArrangewords(4,words);
printf("%d\n", i);
i = canArrangewords(3,words1);
printf("%d\n", i);
i = canArrangewords(3,words2);
printf("%d\n", i);
return 0;
}
Change your array of words into an array of pointers to words. Then you can simply exchange the pointers.
To speed things up, instead of a pointer to a word, have it point to a structure:
struct WORD {
char *firstchar; // begin of word
char *lastchar; // last char of word
} *words[100]; // array of 100 pointers to words
To read the words:
char buf[100];
for (i = 0; i < n; ++i)
{
scanf("%s",buf);
int len= strlen(buf);
words[i]= malloc(sizeof(struct WORDS));
words[i]->firstchar= malloc(len+1);
strcpy(words[i]->firstchar, buf);
words[i]->lastchar= words[i]->firstchar + len-1;
}
Now compare and sort:
if (*words[i]->lastchar == *words[j]->firstchar) {
struct WORDS *tmp= words[i+1];
words[i+1]= words[j];
words[j]= tmp;
}
Do this in a loop, a kind of bubble sort. I leave that to you.

Attempting to split and store arrays similar to strtok

For an assignment in class, we have been instructed to write a program which takes a string and a delimiter and then takes "words" and stores them in a new array of strings. i.e., the input ("my name is", " ") would return an array with elements "my" "name" "is".
Roughly, what I've attempted is to:
Use a separate helper called number_of_delimeters() to determine the size of the array of strings
Iterate through the initial array to find the number of elements in a given string which would be placed in the array
Allocate storage within my array for each string
Store the elements within the allocated memory
Include directives:
#include <stdlib.h>
#include <stdio.h>
This is the separate helper:
int number_of_delimiters (char* s, int d)
{
int numdelim = 0;
for (int i = 0; s[i] != '\0'; i++)
{
if (s[i] == d)
{
numdelim++;
}
}
return numdelim;
}
`This is the function itself:
char** split_at (char* s, char d)
{
int numdelim = number_of_delimiters(s, d);
int a = 0;
int b = 0;
char** final = (char**)malloc((numdelim+1) * sizeof(char*));
for (int i = 0; i <= numdelim; i++)
{
int sizeofj = 0;
while (s[a] != d)
{
sizeofj++;
a++;
}
final[i] = (char*)malloc(sizeofj);
a++;
int j = 0;
while (j < sizeofj)
{
final[i][j] = s[b];
j++;
b++;
}
b++;
final[i][j+1] = '\0';
}
return final;
}
To print:
void print_string_array(char* a[], unsigned int alen)
{
printf("{");
for (int i = 0; i < alen; i++)
{
if (i == alen - 1)
{
printf("%s", a[i]);
}
else
{
printf("%s ", a[i]);
}
}
printf("}");
}
int main(int argc, char *argv[])
{
print_string_array(split_at("Hi, my name is none.", ' '), 5);
return 0;
}
This currently returns {Hi, my name is none.}
After doing some research, I realized that the purpose of this function is either similar or identical to strtok. However, looking at the source code for this proved to be little help because it included concepts we have not yet used in class.
I know the question is vague, and the code rough to read, but what can you point to as immediately problematic with this approach to the problem?
The program has several problems.
while (s[a] != d) is wrong, there is no delimiter after the last word in the string.
final[i][j+1] = '\0'; is wrong, j+1 is one position too much.
The returned array is unusable, unless you know beforehand how many elements are there.
Just for explanation:
strtok will modify the array you pass in! After
char test[] = "a b c ";
for(char* t = test; strtok(t, " "); t = NULL);
test content will be:
{ 'a', 0, 'b', 0, 'c', 0, 0 }
You get subsequently these pointers to your test array: test + 0, test + 2, test + 4, NULL.
strtok remembers the pointer you pass to it internally (most likely, you saw a static variable in your source code...) so you can (and must) pass NULL the next time you call it (as long as you want to operate on the same source string).
You, in contrast, apparently want to copy the data. Fine, one can do so. But here we get a problem:
char** final = //...
return final;
void print_string_array(char* a[], unsigned int alen)
You just return the array, but you are losing length information!
How do you want to pass the length to your print function then?
char** tokens = split_at(...);
print_string_array(tokens, sizeof(tokens));
will fail, because sizeof(tokens) will always return the size of a pointer on your local system (most likely 8, possibly 4 on older hardware)!
My personal recommendation: create a null terminated array of c strings:
char** final = (char**)malloc((numdelim + 2) * sizeof(char*));
// ^ (!)
// ...
final[numdelim + 1] = NULL;
Then your print function could look like this:
void print_string_array(char* a[]) // no len parameter any more!
{
printf("{");
if(*a)
{
printf("%s", *a); // printing first element without space
for (++a; *a; ++a) // *a: checking, if current pointer is not NULL
{
printf(" %s", *a); // next elements with spaces
}
}
printf("}");
}
No problems with length any more. Actually, this is exactly the same principle C strings use themselves (the terminating null character, remember?).
Additionally, here is a problem in your own code:
while (j < sizeofj)
{
final[i][j] = s[b];
j++; // j will always point behind your string!
b++;
}
b++;
// thus, you need:
final[i][j] = '\0'; // no +1 !
For completeness (this was discovered by n.m. already, see the other answer): If there is no trailing delimiter in your source string,
while (s[a] != d)
will read beyond your input string (which is undefined behaviour and could result in your program crashing). You need to check for the terminating null character, too:
while(s[a] && s[a] != d)
Finally: how do you want to handle subsequent delimiters? Currently, you will insert empty strings into your array? Print out your strings as follows (with two delimiting symbols - I used * and + like birth and death...):
printf("*%s+", *a);
and you will see. Is this intended?
Edit 2: The variant with pointer arithmetic (only):
char** split_at (char* s, char d)
{
int numdelim = 0;
char* t = s; // need a copy
while(*t)
{
numdelim += *t == d;
++t;
}
char** final = (char**)malloc((numdelim + 2) * sizeof(char*));
char** f = final; // pointer to current position within final
t = s; // re-assign t, using s as start pointer for new strings
while(*t) // see above
{
if(*t == d) // delimiter found!
{
// can subtract pointers --
// as long as they point to the same array!!!
char* n = (char*)malloc(t - s + 1); // +1: terminating null
*f++ = n; // store in position pointer and increment it
while(s != t) // copy the string from start to current t
*n++ = *s++;
*n = 0; // terminate the new string
}
++t; // next character...
}
*f = NULL; // and finally terminate the string array
return final;
}
While I've now been shown a more elegant solution, I've found and rectified the issues in my code:
char** split_at (char* s, char d)
{
int numdelim = 0;
int x;
for (x = 0; s[x] != '\0'; x++)
{
if (s[x] == d)
{
numdelim++;
}
}
int a = 0;
int b = 0;
char** final = (char**)malloc((numdelim+1) * sizeof(char*));
for (int i = 0; i <= numdelim; i++)
{
int sizeofj = 0;
while ((s[a] != d) && (a < x))
{
sizeofj++;
a++;
}
final[i] = (char*)malloc(sizeofj);
a++;
int j = 0;
while (j < sizeofj)
{
final[i][j] = s[b];
j++;
b++;
}
final[i][j] = '\0';
b++;
}
return final;
}
I consolidated what I previously had as a helper function, and modified some points where I incorrectly incremented .

Resources