I have the below program:(functionality: pads white spaces to the right of string,used astreix here for visual ease):
os:windows(visual studio)
#include "stdafx.h"
#include<stdlib.h>
#include<string.h>
#define CBUFFSIZE 48
void right_pad_str(char *pad_str, char *buff,int max_buffsize){
int padstr_len = 0;
int space_len = 0;
char *end_str = NULL;
memset(buff, '\0', max_buffsize);
padstr_len = strlen(pad_str);
space_len = ((max_buffsize - 1) - padstr_len);
strncpy_s(buff, max_buffsize, pad_str, strlen(pad_str));
end_str = buff +padstr_len;
memset((end_str), '*', space_len);
buff[max_buffsize] = '\0';
}
int _tmain(int argc, _TCHAR* argv[]){
char tmpstr[49] = { '\0' };
char *str = "hello_world";
right_pad_str(str, tmpstr, CBUFFSIZE + 1);
return 0;
}
There seems to be an issue at memset when I look at the value post memeset, it looks very incorrect i.e junk why is this?In the end I null terminate the string yet I see junk value and a stack corruption error, not sure what's wrong with my logic.
(I have attached a snapshot of the same)
The unexpected behaviour can be seen in this simpler example:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
char buffer[3];
buffer[0] = '\0';
buffer[1] = '\0';
buffer[2] = '\0';
strncpy_s(buffer, 3, "*", 1);
printf("%u\n", (unsigned int)(unsigned char)buffer[2]);
return 0;
}
The output is 254 rather than 0, but only in a debug build. This happens during the call to strncpy_s, which is unexpectedly writing to the destination buffer past the end of the copy, presumably in order to expose bugs such as the one (already pointed out) in your code.
NB: Retired Ninja quite correctly points out (in the comments to the question) that this is described, slightly inaccurately, in the documentation's fine print, which I'd originally overlooked:
The debug versions of these functions first fill the buffer with 0xFD. To disable this behavior, use _CrtSetDebugFillThreshold.
(In fact, in Visual Studio 2010, at least, it fills the buffer with 0xFE.)
#Harry Johnston fine answer explains what went wrong.
To pad a string to its array size, recommend:
1) Dispense with the excessive writing of '\0' (memset(buff, ...);... strncpy_s(buff,...) that are subsequently written with data.
2) Use size_t for indexing arrays and string math. size_t is the right size integer for the job.
3) Watch out for badly form calls like with a pad longer than the target or a call with a size of 0. Could check for NULL pointers too.
void right_pad_str(const char *pad_str, char *buff, size_t buff_size){
if (buff_size > 0) {
size_t pad_size = strlen(pad_str) + 1;
if (pad_size > buff_size) {
pad_size = buff_size;
}
memcpy(buff, pad_str, pad_size - 1);
memset(&buff[pad_size - 1], '*', buff_size - pad_size);
buff[buff_size - 1] = '\0';
}
}
// usage
right_pad_str(str, tmpstr, sizeof tmpstr);
Related
I get this warning with gcc -std=gnu17 -Wall -Werror -Wshadow -O3 test.c:
In function ‘insertString’,
inlined from ‘replaceString’ at test.c:94:5,
inlined from ‘main’ at test.c:110:22:
test.c:69:5: error: ‘strncat’ output may be truncated copying between 0 and 77 bytes from a string of length 80 [-Werror=stringop-truncation]
strncat(source, buffer, STRING_SIZE - 1 - position - strlen(stringToInsert));
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
Removing the do while loop (without any changes to the strncat() statement in question) from main() makes the warning go away.
What does the warning mean and why does it go away?
What changes should I incorporate in the code so that the above gcc command doesn't trigger the warning? The solution cannot simply disable the warning (with fe. #pragma statements). The solution has to use strncat() function.
Not important: This is for learning purposes, please be descriptive. The program solves an exercise 9 from chapter 9 of the book "Programming in C (4th Edition)" by Stephen G. Kochan.
The code:
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#define STRING_SIZE 81
int findString(const char strToSearch[], const char strSought[])
{
int strToSearchLength = strlen(strToSearch);
int strSoughtLength = strlen(strSought);
for (int i = 0; i <= strToSearchLength - 1; ++i)
{
if (strToSearch[i] == strSought[0])
{
int j = 0;
while (strToSearch[i+j] == strSought[j])
{
if (strSought[j+1] == '\0')
{
return i;
}
++j;
}
}
else if (i > strToSearchLength - strSoughtLength - 1)
{
return -1;
}
}
return -1;
}
bool removeString(char source[], const int start, const int nCharsToRemove)
{
int i, sourceLength = strlen(source);
if (start + nCharsToRemove > sourceLength || start < 0 || nCharsToRemove < 0)
{
printf("Error in function removeString(): invalid parameters.\n");
return false;
}
else
{
for (i = start; i < sourceLength; ++i)
{
source[i] = source[i + nCharsToRemove];
}
source[i] = '\0';
return true;
}
}
void insertString(char source[], const char stringToInsert[], const int position)
{
char buffer[STRING_SIZE];
int i = 0;
while (source[position + i] != '\0' && position + i < STRING_SIZE - 1)
{
buffer[i] = source[position + i];
++i;
}
buffer[i] = '\0';
source[position] = '\0';
strncat(source, stringToInsert, STRING_SIZE - 1 - position);
// THE STATEMENT MENTIONED IN THE WARNING:
strncat(source, buffer, STRING_SIZE - 1 - position - strlen(stringToInsert));
}
/* A function to replace the first occurence of the string s1
* inside the source string, if it exists, with the string s2
*/
bool replaceString(char source[], const char s1[], const char s2[])
{
int findString(const char strToSearch[], const char strSought[]);
bool removeString(char source[], const int start, const int nCharsToRemove);
void insertString(char source[], const char stringToInsert[], const int position);
int s1_position;
bool success;
// locate s1 inside source
s1_position = findString(source, s1);
if (s1_position == -1)
return false;
// remove s1 from source
success = removeString(source, s1_position, strlen(s1));
if (! success)
return false;
// insert s2 into source at the proper location
insertString(source, s2, s1_position);
return true;
}
int main(void)
{
char text[STRING_SIZE] = "1 is first*";
// uncommenting the following comment and discarding what follows it makes the warning go away
/*
replaceString(text, "is", "one");
printf("%s\n", text);
*/
bool stillFound;
do
stillFound = replaceString(text, "is", "one");
while (stillFound);
printf("%s\n", text);
return 0;
}
What does the warning mean
That is possible, potentially, that buffer will point to a string with 80 characters, but the length STRING_SIZE - 1 - position - strlen(stringToInsert) will be lower then 80. The warning was created to detect cases where not the whole source buffer would be copied to destination (ie. in a strcat(destination, source) call). It may potentially happen. In such case also the destination buffer will not be zero terminated.
why does it go away?
Different code makes compiler make different decisions. In this case compiler doesn't inline the calls, which most probably affects some static analysis done by the compiler. Adding static or attribute((always_inline)) to functions restores the warning. It's hard to answer "why" exactly - either the answer is too broad or too detailed. I believe inspect gcc sources or RTL output to know more.
What changes should I incorporate in the code so that the above gcc command doesn't trigger the warning?
Do not use strncat. Use memcpy. I think something along:
char *dest = &source[position]; // where we copy to
size_t destfree = STRING_SIZE - 1 - position; // how much space we have there
// helper macro
#define MIN(a, b) ((a)<(b)?(a):(b))
size_t to_copy = MIN(strlen(stringToInsert), destfree);
memcpy(dest, stringToInsert, to_copy);
dest += to_copy;
destfree -= to_copy;
to_copy = MIN(strlen(buffer), destfree);
memcpy(dest, buffer, to_copy);
dest += to_copy;
destfree -= to_copy;
dest[0] = '\0';
I made a program in C that can find two similar or different strings and extract the string between them. This type of program has so many uses, and generally when you use such a program, you have a lot of info, so it needs to be fast. I would like tips on how to make this program as fast and efficient as possible.
I am looking for suggestions that won't make me resort to heavy libraries (such as regex).
The code must:
be able to extract a string between two similar or different strings
find the 1st occurrence of string1
find the 1st occurrence of string2 which occurs AFTER string1
extract the string between string1 and string2
be able to use string arguments of any size
be foolproof to human error and return NULL if such occurs (example, string1 exceeds entire text string length. don't crash in an element error, but gracefully return NULL)
focus on speed and efficiency
Below is my code. I am quite new to C, coming from C++, so I could probably use a few suggestions, especially regarding efficient/proper use of the 'malloc' command:
fast_strbetween.c:
/*
Compile with:
gcc -Wall -O3 fast_strbetween.c -o fast_strbetween
*/
#include <stdio.h> // printf
#include <stdlib.h> // malloc
// inline function if it pleases the compiler gods
inline size_t fast_strlen(char *str)
{
int i; // Cannot return 'i' if inside for loop
for(i = 0; str[i] != '\0'; ++i);
return i;
}
char *fast_strbetween(char *str, char *str1, char *str2)
{
// size_t segfaults when incorrect length strings are entered (due to going below 0), so use int instead for increased robustness
int str0len = fast_strlen(str);
int str1len = fast_strlen(str1);
int str1pos = 0;
int charsfound = 0;
// Find str1
do {
charsfound = 0;
while (str1[charsfound] == str[str1pos + charsfound])
++charsfound;
} while (++str1pos < str0len - str1len && charsfound < str1len);
// '++str1pos' increments past by 1: needs to be set back by one
--str1pos;
// Whole string not found or logical impossibilty
if (charsfound < str1len)
return NULL;
/* Start searching 2 characters after last character found in str1. This will ensure that there will be space, and logical possibility, for the extracted text to exist or not, and allow immediate bail if the latter case; str1 cannot possibly have anything between it if str2 is right next to it!
Example:
str = 'aa'
str1 = 'a'
str2 = 'a'
returned = '' (should be NULL)
Without such preventative, str1 and str2 would would be found and '' would be returned, not NULL. This also saves 1 do/while loop, one check pertaining to returning null, and two additional calculations:
Example, if you didn't add +1 str2pos, you would need to change the code to:
if (charsfound < str2len || str2pos - str1pos - str1len < 1)
return NULL;
It also allows for text to be found between three similar strings—what??? I can feel my brain going fuzzy!
Let this example explain:
str = 'aaa'
str1 = 'a'
str2 = 'a'
result = '' (should be 'a')
Without the aforementioned preventative, the returned string is '', not 'a'; the program takes the first 'a' for str1 and the second 'a' for str2, and tries to return what is between them (nothing).
*/
int str2pos = str1pos + str1len + 1; // the '1' added to str2pos
int str2len = fast_strlen(str2);
// Find str2
do {
charsfound = 0;
while (str2[charsfound] == str[str2pos + charsfound])
++charsfound;
} while (++str2pos < str0len - str2len + 1 && charsfound < str2len);
// Deincrement due to '++str2pos' over-increment
--str2pos;
if (charsfound < str2len)
return NULL;
// Only allocate what is needed
char *strbetween = (char *)malloc(sizeof(char) * str2pos - str1pos - str1len);
unsigned int tmp = 0;
for (unsigned int i = str1pos + str1len; i < str2pos; i++)
strbetween[tmp++] = str[i];
return strbetween;
}
int main() {
char str[30] = { "abaabbbaaaabbabbbaaabbb" };
char str1[10] = { "aaa" };
char str2[10] = { "bbb" };
//Result should be: 'abba'
printf("The string between is: \'%s\'\n", fast_strbetween(str, str1, str2));
// free malloc as we go
for (int i = 10000000; --i;)
free(fast_strbetween(str, str1, str2));
return 0;
}
In order to have some way of measuring progress, I have already timed the code above (extracting a small string 10000000 times):
$ time fast_strbetween
The string between is: 'abba'
0m11.09s real 0m11.09s user 0m00.00s system
Process used 99.3 - 100% CPU according to 'top' command (Linux).
Memory used while running: 3.7Mb
Executable size: 8336 bytes
Ran on a Raspberry Pi 3B+ (4 x 1.4Ghz, Arm 6)
If anyone would like to offer code, tips, pointers... I would appreciate it. I will also implement the changes and give a timed result for your troubles.
Oh, and one thing that I learned is to always de-allocate malloc; I ran the code above (with extra loops), just before posting this. My computer's ram filled up, and the computer froze. Luckily, Stack made a backup draft! Lesson learned!
* EDIT *
Here is the revised code using chqrlie's advice as best I could. Added extra checks for end of string, which ended up costing about a second of time with the tested phrase but can now bail very fast if the first string is not found. Using null or illogical strings should not result in error, hopefully. Lots of notes int the code, where they can be better understood. If I've left anything thing out or done something incorrectly, please let me know guys; it is not intentional.
fast_strbetween2.c:
/*
Compile with:
gcc -Wall -O3 fast_strbetween2.c -o fast_strbetween2
Corrections and additions courtesy of:
https://stackoverflow.com/questions/55308295/extracting-a-string-between-two-similar-or-different-strings-in-c-as-fast-as-p
*/
#include<stdio.h> // printf
#include<stdlib.h> // malloc, free
// Strings now set to 'const'
char * fast_strbetween(const char *str, const char *str1, const char *str2)
{
// string size will now be calculated by the characters picked up
size_t str1pos = 0;
size_t str1chars;
// Find str1
do{
str1chars = 0;
// Will the do/while str1 check for '\0' suffice?
// I haven't seen any issues yet, but not sure.
while(str1[str1chars] == str[str1pos + str1chars] && str1[str1chars] != '\0')
{
//printf("Found str1 char: %i num: %i pos: %i\n", str1[str1chars], str1chars + 1, str1pos);
++str1chars;
}
// Incrementing whilst not in conditional expression tested faster
++str1pos;
/* There are two checks for "str1[str1chars] != '\0'". Trying to find
another efficient way to do it in one. */
}while(str[str1pos] != '\0' && str1[str1chars] != '\0');
--str1pos;
//For testing:
//printf("str1pos: %i str1chars: %i\n", str1pos, str1chars);
// exit if no chars were found or if didn't reach end of str1
if(!str1chars || str1[str1chars] != '\0')
{
//printf("Bailing from str1 result\n");
return '\0';
}
/* Got rid of the '+1' code which didn't allow for '' returns.
I agree with your logic of <tag></tag> returning ''. */
size_t str2pos = str1pos + str1chars;
size_t str2chars;
//printf("Starting pos for str2: %i\n", str1pos + str1chars);
// Find str2
do{
str2chars = 0;
while(str2[str2chars] == str[str2pos + str2chars] && str2[str2chars] != '\0')
{
//printf("Found str2 char: %i num: %i pos: %i \n", str2[str2chars], str2chars + 1, str2pos);
++str2chars;
}
++str2pos;
}while(str[str2pos] != '\0' && str2[str2chars] != '\0');
--str2pos;
//For testing:
//printf("str2pos: %i str2chars: %i\n", str2pos, str2chars);
if(!str2chars || str2[str2chars] != '\0')
{
//printf("Bailing from str2 result!\n");
return '\0';
}
/* Trying to allocate strbetween with malloc. Is this correct? */
char * strbetween = malloc(2);
// Check if malloc succeeded:
if (strbetween == '\0') return '\0';
size_t tmp = 0;
// Grab and store the string between!
for(size_t i = str1pos + str1chars; i < str2pos; ++i)
{
strbetween[tmp] = str[i];
++tmp;
}
return strbetween;
}
int main() {
char str[30] = { "abaabbbaaaabbabbbaaabbb" };
char str1[10] = { "aaa" };
char str2[10] = { "bbb" };
printf("Searching \'%s\' for \'%s\' and \'%s\'\n", str, str1, str2);
printf(" 0123456789\n\n"); // Easily see the elements
printf("The word between is: \'%s\'\n", fast_strbetween(str, str1, str2));
for(int i = 10000000; --i;)
free(fast_strbetween(str, str1, str2));
return 0;
}
** Results **
$ time fast_strbetween2
Searching 'abaabbbaaaabbabbbaaabbb' for 'aaa' and 'bbb'
0123456789
The word between is: 'abba'
0m10.93s real 0m10.93s user 0m00.00s system
Process used 99.0 - 100% CPU according to 'top' command (Linux).
Memory used while running: 1.8Mb
Executable size: 8336 bytes
Ran on a Raspberry Pi 3B+ (4 x 1.4Ghz, Arm 6)
chqrlie's answer
I understand that this is just some example code that shows proper programming practices. Nonetheless, it can make for a decent control in testing.
Please note that I do not know how to deallocate malloc in your code, so it is NOT a fair test. As a result, ram usage builds up, taking 130Mb+ for the process alone. I was still able to run the test for the full 10000000 loops. I will say that I tried deallocating this code the way I did my code (via bringing the function 'simple_strbetween' down into main and deallocating with 'free(strndup(p, q - p));'), and the results weren't much different from not deallocating.
** simple_strbetween.c **
/*
Compile with:
gcc -Wall -O3 simple_strbetween.c -o simple_strbetween
Courtesy of:
https://stackoverflow.com/questions/55308295/extracting-a-string-between-two-similar-or-different-strings-in-c-as-fast-as-p
*/
#include<string.h>
#include<stdio.h>
char *simple_strbetween(const char *str, const char *str1, const char *str2) {
const char *q;
const char *p = strstr(str, str1);
if (p) {
p += strlen(str1);
q = *str2 ? strstr(p, str2) : p + strlen(p);
if (q)
return strndup(p, q - p);
}
return NULL;
}
int main() {
char str[30] = { "abaabbbaaaabbabbbaaabbb" };
char str1[10] = { "aaa" };
char str2[10] = { "bbb" };
printf("Searching \'%s\' for \'%s\' and \'%s\'\n", str, str1, str2);
printf(" 0123456789\n\n"); // Easily see the elements
printf("The word between is: \'%s\'\n", simple_strbetween(str, str1, str2));
for(int i = 10000000; --i;)
simple_strbetween(str, str1, str2);
return 0;
}
$ time simple_strbetween
Searching 'abaabbbaaaabbabbbaaabbb' for 'aaa' and 'bbb'
0123456789
The word between is: 'abba'
0m19.68s real 0m19.34s user 0m00.32s system
Process used 100% CPU according to 'top' command (Linux).
Memory used while running: 130Mb (leak due do my lack of knowledge)
Executable size: 8380 bytes
Ran on a Raspberry Pi 3B+ (4 x 1.4Ghz, Arm 6)
Results for above code ran with this alternate strndup:
char *alt_strndup(const char *s, size_t n)
{
size_t i;
char *p;
for (i = 0; i < n && s[i] != '\0'; i++)
continue;
p = malloc(i + 1);
if (p != NULL) {
memcpy(p, s, i);
p[i] = '\0';
}
return p;
}
$ time simple_strbetween
Searching 'abaabbbaaaabbabbbaaabbb' for 'aaa' and 'bbb'
0123456789
The word between is: 'abba'
0m20.99s real 0m20.54s user 0m00.44s system
I kindly ask that nobody make judgements on the results until the code is properly ran. I will revise the results as soon as it is figured out.
* Edit *
Was able to decrease the time by over 25% (11.93s vs 8.7s). This was done by using pointers to increment the positions, as opposed to size_t. Collecting the return string whilst checking the last string was likely what caused the biggest change. I feel there is still lots of room for improvement. A big loss comes from having to free malloc. If there is a better way, I'd like to know.
fast_strbetween3.c:
/*
gcc -Wall -O3 fast_strbetween.c -o fast_strbetween
*/
#include<stdio.h> // printf
#include<stdlib.h> // malloc, free
char * fast_strbetween(const char *str, const char *str1, const char *str2)
{
const char *sbegin = &str1[0]; // String beginning
const char *spos;
// Find str1
do{
spos = str;
str1 = sbegin;
while(*spos == *str1 && *str1)
{
++spos;
++str1;
}
++str;
}while(*str1 && *spos);
// Nothing found if spos hasn't advanced
if (spos == str)
return NULL;
char *strbetween = malloc(1);
if (!strbetween)
return '\0';
str = spos;
int i = 0;
//char *p = &strbetween[0]; // Alt. for advancing strbetween (slower)
sbegin = &str2[0]; // Recycle sbegin
// Find str2
do{
str2 = sbegin;
spos = str;
while(*spos == *str2 && *str2)
{
++str2;
++spos;
}
//*p = *str;
//++p;
strbetween[i] = *str;
++str;
++i;
}while(*str2 && *spos);
if (spos == str)
return NULL;
//*--p = '\0';
strbetween[i - 1] = '\0';
return strbetween;
}
int main() {
char s[100] = "abaabbbaaaabbabbbaaabbb";
char s1[100] = "aaa";
char s2[100] = "bbb";
printf("\nString: \'%s\'\n", fast_strbetween(s, s1, s2));
for(int i = 10000000; --i; )
free(fast_strbetween(s, s1, s2));
return 0;
}
String: 'abba'
0m08.70s real 0m08.67s user 0m00.01s system
Process used 99.0 - 100% CPU according to 'top' command (Linux).
Memory used while running: 1.8Mb
Executable size: 8336 bytes
Ran on a Raspberry Pi 3B+ (4 x 1.4Ghz, Arm 6)
* Edit *
This doesn't really count as it does not 'return' a value, and therefore is against my own rules, but it does pass a variable through, which is changed and brought back to main. It runs with 1 library and takes 3.6s. Getting rid of malloc was the key.
/*
gcc -Wall -O3 fast_strbetween.c -o fast_strbetween
*/
#include<stdio.h> // printf
unsigned int fast_strbetween(const char *str, const char *str1, const char *str2, char *strbetween)
{
const char *sbegin = &str1[0]; // String beginning
const char *spos;
// Find str1
do{
spos = str;
str1 = sbegin;
while(*spos == *str1 && *str1)
{
++spos;
++str1;
}
++str;
}while(*str1 && *spos);
// Nothing found if spos hasn't advanced
if (spos == str)
{
strbetween[0] = '\0';
return 0;
}
str = spos;
sbegin = &str2[0]; // Recycle sbegin
// Find str2
do{
str2 = sbegin;
spos = str;
while(*spos == *str2 && *str2)
{
++str2;
++spos;
}
*strbetween = *str;
++strbetween;
++str;
}while(*str2 && *spos);
if (spos == str)
{
strbetween[0] = '\0';
return 0;
}
*--strbetween = '\0';
return 1; // Successful (found text)
}
int main() {
char s[100] = "abaabbbaaaabbabbbaaabbb";
char s1[100] = "aaa";
char s2[100] = "bbb";
char sret[100];
fast_strbetween(s, s1, s2, sret);
printf("String: %s\n", sret);
for(int i = 10000000; --i; )
fast_strbetween(s, s1, s2, sret);
return 0;
}
Your code has multiple problems and is probably not as efficient as it should be:
you use types int and unsigned int for indexes into the strings. These types may be smaller than the range of size_t. You should revise your code to use size_t and avoid mixing signed and unsigned types in comparisons.
your functions' string arguments should be declared as const char * as you do not modify the strings and should be able to pass const strings without a warning.
redefining strlen is a bad idea: your version will be slower than the system's optimized, assembly coded and very likely inlined version.
computing the length of str is unnecessary and potentially costly: both str1 and str2 may appear close to the beginning of str, scanning for the end of str will be wasteful.
the while loop inside the first do / while loop is incorrect: while(str1[charsfound] == str[str1pos + charsfound]) charsfound++; may access characters beyond the end of str and str1 as the loop does not stop at the null terminator. If str1 only appears at the end of str, you have undefined behavior.
if str1 is an empty string, you will find it at the end of str instead of at the beginning.
why do you initialize str2pos as int str2pos = str1pos + str1len + 1;? If str2 immediately follows str1 inside str, an empty string should be allocated and returned. Your comment regarding this case is unreadable, you should break such long lines to fit within a typical screen width such as 80 columns. It is debatable whether strbetween("aa", "a", "a") should return "" or NULL. IMHO it should return an allocated empty string, which would be consistent with the expected behavior on strbetween("<name></name>", "<name>", "</name>") or strbetween("''", "'", "'"). Your specification preventing strbetween from returning an empty string produces a counter-intuitive border case.
the second scanning loop has the same problems as the first.
the line char *strbetween = (char *) malloc(sizeof(char) * str2pos - str1pos - str1len); has multiple problems: no cast is necessary in C, if you insist on specifying the element size sizeof(char), which is 1 by definition, you should parenthesize the number of elements, and last but not least, you must allocate one extra element for the null terminator.
You do not test if malloc() succeeded. If it returns NULL, you will have undefined behavior, whereas you should just return NULL.
the copying loop uses a mix of signed and unsigned types, causing potentially counterintuitive behavior on overflow.
you forget to set the null terminator, which is consistent with the allocation size error, but incorrect.
Before you try and optimize code, you must ensure correctness! Your code is too complicated and has multiple flaws. Optimisation is a moot point.
You should first try a very simple implementation using standard C string functions: searching a string inside another one is performed efficiently by strstr.
Here is a simple implementation using strstr and strndup(), which should be available on your system:
#include <string.h>
char *simple_strbetween(const char *str, const char *str1, const char *str2) {
const char *q;
const char *p = strstr(str, str1);
if (p) {
p += strlen(str1);
q = *str2 ? strstr(p, str2) : p + strlen(p);
if (q)
return strndup(p, q - p);
}
return NULL;
}
strndup() is defined in POSIX and is part of the Extensions to the C Library Part II: Dynamic Allocation Functions, ISO/IEC TR 24731-2:2010. If it is not available on your system, it can be redefined as:
#include <stdlib.h>
#include <string.h>
char *strndup(const char *s, size_t n) {
size_t i;
char *p;
for (i = 0; i < n && s[i] != '\0'; i++)
continue;
p = malloc(i + 1);
if (p != NULL) {
memcpy(p, s, i);
p[i] = '\0';
}
return p;
}
To ensure correctness, write a number of test cases, with border cases such as all combinations of empty strings and identical strings.
Once your have thoroughly your strbetween function, you can write a benchmarking framework to test performance. This is not so easy to get reliable performance figures, as you will experience if you try. Remember to configure your compiler to select the appropriate optimisations, -O3 for example.
Only then can you move to the next step: if you are really restricted from using standard C library functions, you may first recode your versions of strstr and strlen and still use the same method. Test this new version both for correctness and for performance.
The redundant parts are the computation of strlen(str1) which must have been determined by strstr when it finds a match. And the scan in strndup() which is unnecessary since no null byte is present between p and q. If you have time to waste, you can try and remove these redundancies at the expense of readability, risking non conformity. I would be surprised if you get any improvement at all on average over a wide variety of test cases. 20% would be remarkable.
I'm probably missing something obvious, but my C is pretty rusty and I'm not having any luck making sense of this. I have a loop where I want to iterate over an array of uint64_t values coming from libdvdnav and then format the values and insert them into a string.
The header for libdvdnav defines the function I'm calling thusly:
uint32_t dvdnav_describe_title_chapters(dvdnav_t *self, int32_t title, uint64_t **times, uint64_t *duration);
Here's how I'm defining the variables used and executing the call (dvdnav and args[0] are defined and initialized elsewhere):
uint64_t *times;
uint64_t duration;
uint32_t times_array_len;
times_array_len = dvdnav_describe_title_chapters(dvdnav, atoi(args[0]), ×, &duration);
The code below seems to work, and compiles & runs w/o error, but of course only the first value in the array is inserted:
int i = 0;
uint64_t a_time = times[0];
while(i < times_array_len){
char time_json[100];
sprintf(time_json, "{\"chapter\":\"%d\",\"time\":\"%u\"},", i, a_time);
strcat(payload, time_json);
i++;
}
If I modify this to select each value in the array it still compiles clean, but throws a segfault at runtime:
int i = 0;
while(i < times_array_len){
char time_json[100];
sprintf(time_json, "{\"chapter\":\"%d\",\"time\":\"%u\"},", i, times[i]);
strcat(payload, time_json);
i++;
}
I thought maybe there was something in one of the array elements that was a problem (a too-large value, unexpected, NULL, etc.) but even if I replace the variable i with a known-reasonable element (say, 0) it still segfaults.
I'm sure there's countless other improvements to be made here (safer allocations, overflow protection, etc.) but the part I'm struggling to decipher is getting those values out of the array an into my formatted string.
How is payload defined? If it is too short to contain the strings then you will get a crash.
You can tackle this in several ways:
1) Since you now the number of json entries will be times_array_len you can allocate the string on heap using malloc with the size 100 * times_array_len - you will never exceed that (again, not sure if it is smart using a fixed length for the json buffer), then strcat should be safe. You can even do direct sprintf calls into the payload buffer dirrectly since you will now how far the offset is by keeping track of the return value of sprintf. Something like this:
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char** argv)
{
__int64 pTimes[] = { 1, 2, 3 ,4};
size_t nTimeCount = sizeof(pTimes) / sizeof(pTimes[0]);
size_t nPayloadOffset = 0;
char* pPayload = (char*)malloc(100 * nTimeCount);
if (pPayload)
{
for (size_t nTimeIndex = 0; nTimeIndex < nTimeCount; ++nTimeIndex)
{
nPayloadOffset += sprintf(&pPayload[nPayloadOffset], "{\"chapter\":\"%d\",\"time\":\"%u\"},", nTimeIndex, pTimes[nTimeIndex]);
}
printf("%s\n", pPayload);
free(pPayload);
}
return EXIT_SUCCESS;
}
2) To avoid running over the 100 character length on a single entry you could be wise and allocate the pPlayoad with an initial size, then calculate the size of each entry and reallocate the pPayload if it becomes too short
3) Use C++ and std::stringstream if C++ is an option:
#include <sstream>
#include <iostream>
int main(int argc, char** argv)
{
__int64 pTimes[] = { 1, 2, 3 ,4};
size_t nTimeCount = sizeof(pTimes) / sizeof(pTimes[0]);
size_t nPayloadOffset = 0;
std::stringstream JsonStream;
for (size_t nTimeIndex = 0; nTimeIndex < nTimeCount; ++nTimeIndex)
{
JsonStream << "{\"chapter\":\"" << nTimeIndex << "\",\"time\":\"" << pTimes[nTimeIndex] << "\"},";
}
std::cout << JsonStream.str() << std::endl;
return EXIT_SUCCESS;
}
I am really stuck with one very simple piece of code.
This program takes argument like ./a.out -t=1,32,45,2 and prints quantity of commas in stdout. But from time to time execution works correctly and and more often throws segmentation fault.
I figured out that problem in this line of function substr_cnt (I also placed corresponding commentaries in code below):
target_counting = (char *)malloc(sizeof(char)*(strlen(target)));
In fact malloc returns NULL. If I change sizeof(char) by sizeof(char *) all starts work like a charm but I can't understand why is that. Furthermore in main function I also use malloc, and even with the same line
arg_parameter = (char *) malloc(sizeof(char)*(strlen(argv[1] - 3)));
all works just fine.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define strindex(target, source) ((size_t) strstr(target, source) - (size_t) target)
int substr_cnt( char *target, char *source ) {
int i=0;
int cnt=0;
char *target_counting;
//this is NOT working
target_counting = (char *)malloc(sizeof(char)*(strlen(target)));
//this is working
//target_counting = (char *)malloc(sizeof(char *)*(strlen(target)));
if (target_counting == NULL) {
printf("malloc failed\n");
return -1;
}
strcpy(target_counting, target);
while ((i=strindex(target_counting, source)) > 0) {
strncpy(target_counting, target_counting + i + 1, strlen(target_counting));
cnt++;
}
free(target_counting);
return cnt;
}
int main( int argc, char *argv[] )
{
int i;
int default_behavior = 0;
int arg_parametr_cnt;
char *arg_parameter;
if (argc == 1) {
default_behavior = 1;
} else if (argv[1][0] == '-' && argv[1][1] == 't' && argv[1][2] == '=') {
//this is working
arg_parameter = (char *) malloc(sizeof(char)*(strlen(argv[1] - 3)));
strncpy(arg_parameter, argv[1]+3, strlen(argv[1]));
printf("%s\n", arg_parameter);
arg_parametr_cnt = substr_cnt(arg_parameter, ",");
printf("commas: %d\n", arg_parametr_cnt);
}
else {
printf("wrong command line");
return 1;
}
return 0;
}
You have several issues here, the main point, you don't need to allocate memory at all. You can implement searching for a given substring without modifying the string and therefore work directly on the given argv parameters, e.g.
int substr_cnt(const char *haystack, const char *needle)
{
int cnt = 0;
const char *found = haystack;
while ((found = strstr(found, needle)) != NULL) {
++found;
++cnt;
}
return cnt;
}
Same for the call in main, just pass argv directly
arg_parametr_cnt = substr_cnt(argv[1] + 3, ",");
Now to answer your question, unless you really see the output of
printf("malloc failed\n");
I don't believe, malloc returns NULL, because when you allocate an even larger amount of memory, sizeof(char*) vs sizeof(char), it works.
The reasons, why your program crashes, are already covered in the other answers. To summarize
target_counting = (char *)malloc(sizeof(char)*(strlen(target))); allocates one char less than it should
while ((i=strindex(target_counting, source)) > 0) I'm not sure, what happens, when the result of strstr is NULL. strindex might return a negative number, depending on your memory layout, but I am not sure.
strncpy(target_counting, target_counting + i + 1, strlen(target_counting)); This is not really an issue, but since you copy the rest of the string, you could use strcpy(target_counting, target_counting + i + 1) instead.
arg_parameter = (char *) malloc(sizeof(char)*(strlen(argv[1] - 3))); this should be malloc(sizeof(char) * strlen(argv[1]) - 3 + 1)
strncpy(arg_parameter, argv[1]+3, strlen(argv[1])); again strcpy(arg_parameter, argv[1]+3) would be sufficient
Update:
In this version
int strindex(char *target, char *source)
{
char *idx;
if ((idx = strstr(target, source)) != NULL) {
return idx - target;
} else {
return -1;
}
}
you have an explicit test for NULL and act accordingly.
In the macro version
#define strindex(target, source) ((size_t) strstr(target, source) - (size_t) target)
there is no such test. You determine the index by calculating the difference between strstr() and the base address target. This is fine so far, but what happens, when strstr() returns NULL?
Pointer arithmetic is defined with two pointers, pointing into the same array. As soon as the two pointers point into different arrays, or one pointing into an array and the other somewhere else, the behaviour is undefined.
Technically, when you calculate NULL - target, it might yield a negative value, but it also might not. If target points to the address of 0x0f0a3a90, you could have 0x0 - 0x0f0a3a90 and get a negative value. If target points to 0xfe830780 however, it might be interpreted as a negative number, and then 0x0 - 0xfe830780 could result in a positive number.
But the main point is, you have undefined behaviour. For further reading look for pointer arithmetic, e.g. C++: Pointer Arithmetic
your malloc is not allocating space for the null terminator, you need to malloc (strlen(string)+1).
The malloc with a char* works because a pointer (is normal) 4 bytes long, so you are allocating 4 times more memory than required - minus the 1 byte need for a null terminator.
The problem may lie here: malloc(sizeof(char)*(strlen(argv[1] - 3)) in main. You are subtracting 3 from argv[1].
I think you intended to use:
malloc(sizeof(char)*(strlen(argv[1]) - 2)); // Allocate one more space for '\0' character
Doing this makes strlen to access unallocated memory.
Your program may not fail here, but later, because it is simply undefined behavior.
There are several buffer overruns, but I think that the bug that makes you program crash is the following:
strncpy(target_counting, target_counting + i + 1, strlen(target_counting));
Note that the strings in strncpy may not overlap!
I suggest that you do a memmove instead, because memmove can handle overlapping buffers:
memmove(target_counting, target_counting + i + 1, strlen(target_counting + i + 1) + 1);
I think your main issue is here :
arg_parameter = (char *) malloc(sizeof(char)*(strlen(argv[1] - 3)));
especially here
strlen(argv[1] - 3)
you pass to strlen address of argv[1]-3 which is not valid address.
actually what you meant is strlen(argv[1]) - 3. As others said you also should add one char for \0 so strlen(argv[1]) - 2
I have problem with my array of char*-
char *original_file_name_list[500];
while(dp=readdir(dir)) != NULL) {
original_file_name = dp->d_name;
original_file_name_list[counter] = original_file_name;
printf("%s\n",original_file_name_list[0]);
printf("%d\n",counter);
counter++;
}
The problem is, that it prints all files fine. It should print only first file, right?
And if I try printf("%s\n",original_file_name_list[1]); It doesn't work , which means that it is writing only in 1st string. Any idea why?
edit: There is no syntax error due to compiler.
You're not copying the string at all - also your file_name_list array hasn't enough space for a list of filenames - just for a list of pointers. But dp->d_name is just a pointer to a char* - you can't know for how long the memory behind the pointer is valid. Because of that you have to make a copy for yourself.
#include <string.h>
#include <dirent.h>
int main(int argc, char** argv){
char original_file_name_list[50][50];
size_t counter = 0;
while(dp=readdir(dir)) != NULL) // does work fine (ordinary reading files from dir)
{
size_t len = strlen(dp->d_name);
if(len >= 50) len = 49;
strncpy(original_file_name_list[counter], dp->d_name, len);
original_file_name_list[counter][len] = '\0';
printf("%d\n",counter);
counter++;
}
printf("%s\n",original_file_name_list[1]); // <- will work if you have at least 2 files in your directory
return 0;
}
I'm not sure about purpose of counter2 (I have replaced it with counter) but I can propose the following code with strdup() call to store the file names:
char *original_file_name_list[500] = {0}; // it is better to init it here
while(dp=readdir(dir)) != NULL) {
original_file_name_list[counter] = strdup(dp->d_name); // strdup() is ok to use
// here, see the comments
printf("%s\n%d\n",original_file_name_list[counter], counter);
counter++;
}
/* some useful code */
/* don't forget to free the items of list (allocated by strdup(..) )*/
for (int i = 0; i < 500; ++i) {
free(original_file_name_list[i]);
}