For this code below that I was writing. I was wondering, if I want to split the string but still retain the original string is this the best method?
Should the caller provided the ** char or should the function "split" make an additional malloc call and memory manage the ** char?
Also, I was wondering if this is the most optimizing method, or could I optimize the code better than this?
I still have not debug the code yet, I am a bit undecided whether if the caller manage the ** char or the function manage the pointer ** char.
#include <stdio.h>
#include <stdlib.h>
size_t split(const char * restrict string, const char splitChar, char ** restrict parts, const size_t maxParts){
size_t size = 100;
size_t partSize = 0;
size_t len = 0;
size_t newPart = 1;
char * tempMem;
/*
* We just reverse a long page of memory
* At reaching the space character that is the boundary of the new
*/
char * mem = (char*) malloc( sizeof(char) * size );
if ( mem == NULL ) return 0;
for ( size_t i = 0; string[i] != 0; i++ ) {
// If it is a split char we at a new part
if ( string[i] == splitChar) {
// If the last character was not the split character
// Then mem[len] = 0 and increase the len by 1.
if (newPart == 0) mem[len++] = 0;
newPart = 1;
continue;
} else {
// If this is a new part
// and not a split character
// we make a new pointer
if ( newPart == 1 ){
// if reach maxpart we break.
// It is okay here, to not worry about memory
if ( partSize == maxParts ) break;
parts[partSize++] = &mem[len];
newPart = 0;
}
mem[len++] = string[i];
if ( len == size ){
// if ran out of memory realloc.
tempMem = (char*)realloc(mem, sizeof(char) * (size << 1) );
// if fail quit loop
if ( tempMem == NULL ) {
// If we can't get more memory the last part could be corrupted
// We have to return.
// Otherwise the code below can seg.
// There maybe a better way than this.
return partSize--;
}
size = size << 1;
mem = tempMem;
}
}
}
// If we got here and still in a newPart that is fine no need
// an additional character.
if ( newPart != 1 ) mem[len++] = 0;
// realloc to give back the unneed memory
if ( len < size ) {
tempMem = (char*) realloc(mem, sizeof(char) * len );
// If the resizing did not fail but yielded a different
// memory block;
if ( tempMem != NULL && tempMem != mem ){
for ( size_t i = 0; i < partSize; i++ ){
parts[i] = tempMem + (parts[i] - mem);
}
}
}
return partSize;
}
int main(){
char * tStr = "This is a super long string just to test the str str adfasfas something split";
char * parts[10];
size_t len = split(tStr, ' ', parts, 10);
for (size_t i = 0; i < len; i++ ){
printf("%zu: %s\n", i, parts[i]);
}
}
What is "best" is very subjective, as well as use case dependent.
I personally would keep the parameters as input only, define a struct to contain the split result, and probably return such by value. The struct would probably contain pointers to memory allocation, so would also create a helper function free that memory. The parts might be stored as list of strings (copy string data) or index&len pairs for the original string (no string copies needed, but original string needs to remain valid).
But there are dozens of very different ways to do this in C, and all a bit klunky. You need to choose your flavor of klunkiness based on your use case.
About being "more optimized": unless you are coding for a very small embedded device or something, always choose a more robust, clear, easier to use, harder to use wrong over more micro-optimized. The useful kind of optimization turns, for example, O(n^2) to O(n log n). Turning O(3n) to O(2n) of a single function is almost always completely irrelevant (you are not going to do string splitting in a game engine inner rendering loop...).
I have a string creating function in C which accepts an array of structs as it's argument and outputs a string based on a predefined format (like a list of list in python).
Here's the function
typedef struct
{
PacketInfo_t PacketInfo;
char Gnss60[1900];
//and other stuff...
} Track_json_t;
typedef struct
{
double latitude;
double longitude;
} GPSPoint_t;
typedef struct
{
UInt16 GPS_StatusCode;
UInt32 fixtime;
GPSPoint_t point;
double altitude;
unsigned char GPS_Satilite_Num;
} GPS_periodic_t;
unsigned short SendTrack()
{
Track_json_t i_sTrack_S;
memset(&i_sTrack_S, 0x00, sizeof(Track_json_t));
getEvent_Track(&i_sTrack_S);
//Many other stuff added to the i_sTrack_S struct...
//Make a JSON format out of it
BuildTrackPacket_json(&i_sTrack_S, XPORT_MODE_GPRS);
}
Track_json_t *getEvent_Track(Track_json_t *trk)
{
GPS_periodic_t l_gps_60Sec[60];
memset(&l_gps_60Sec, 0x00,
sizeof(GPS_periodic_t) * GPS_PERIODIC_ARRAY_SIZE);
getLastMinGPSdata(l_gps_60Sec, o_gps_base);
get_gps60secString(l_gps_60Sec, trk->Gnss60);
return trk;
}
void get_gps60secString(GPS_periodic_t input[60], char *output)
{
int i = 0;
memcpy(output, "[", 1); ///< Copy the first char as [
char temp[31];
for (i = 0; i < 59; i++) { //Run for n-1 elements
memset(temp, 0, sizeof(temp));
snprintf(temp, sizeof(temp), "[%0.8f,%0.8f],",
input[i].point.latitude, input[i].point.longitude);
strncat(output, temp, sizeof(temp));
}
memset(temp, 0, sizeof(temp)); //assign last element
snprintf(temp, sizeof(temp), "[%0.8f,%0.8f]]",
input[i].point.latitude, input[i].point.longitude);
strncat(output, temp, sizeof(temp));
}
So the output of the function must be a string of format
[[12.12345678,12.12345678],[12.12345678,12.12345678],...]
But at times I get a string which looks like
[[12.12345678,12.12345678],[55.01[12.12345678,12.12345678],...]
[[21.28211567,84.13454083],[21.28211533,21.22[21.28211517,84.13454000],..]
Previously, I had a buffer overflow at the function get_gps60secString, I fixed that by using snprintf and strncat.
Note: This is an embedded application and this error occur once or twice a day (out of 1440 packets)
Question
1. Could this be caused by an interrupt during the snprintf/strncat process?
2. Could this be caused by a memory leak, overwriting the stack or some other segmentation issue caused else where?
Basically I would like to understand what might be causing a corrupt string.
Having a hard time finding the cause and fixing this bug.
EDIT:
I used chux's function. Below is the Minimal, Complete, and Verifiable Example
/*
* Test code for SO question https://stackoverflow.com/questions/5216413
* A Minimal, Complete, and Verifiable Example
*/
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <stdbool.h>
#include <signal.h>
#include <unistd.h>
typedef unsigned short UInt16;
typedef unsigned long UInt32;
#define GPS_PERIODIC_ARRAY_SIZE 60
#define GPS_STRING_SIZE 1900
/* ---------------------- Data Structs --------------------------*/
typedef struct
{
char Gnss60[GPS_STRING_SIZE];
} Track_json_t;
typedef struct
{
double latitude;
double longitude;
} GPSPoint_t;
typedef struct
{
UInt16 GPS_StatusCode;
UInt32 fixtime;
GPSPoint_t point;
double altitude;
unsigned char GPS_Satilite_Num;
} GPS_periodic_t;
/* ----------------------- Global --------------------------------*/
FILE *fptr; //Global file pointer
int res = 0;
int g_last = 0;
GPS_periodic_t l_gps_60Sec[GPS_PERIODIC_ARRAY_SIZE];
/* ----------------------- Function defs --------------------------*/
/* At signal interrupt this function is called.
* Flush and close the file. And safly exit the program */
void userSignalInterrupt()
{
fflush(fptr);
fclose(fptr);
res = 1;
exit(0);
}
/* #brief From the array of GPS structs we create a string of the format
* [[lat,long],[lat,long],..]
* #param input The input array of GPS structs
* #param output The output string which will contain lat, long
* #param sz Size left in the output buffer
* #return 0 Successfully completed operation
* 1 Failed / Error
*/
int get_gps60secString(GPS_periodic_t input[GPS_PERIODIC_ARRAY_SIZE],
char *output, size_t sz)
{
int cnt = snprintf(output, sz, "[");
if (cnt < 0 || cnt >= sz)
return 1;
output += cnt;
sz -= cnt;
int i = 0;
for (i = 0; i < GPS_PERIODIC_ARRAY_SIZE; i++) {
cnt = snprintf(output, sz, "[%0.8f,%0.8f]%s",
input[i].point.latitude, input[i].point.longitude,
i + 1 == GPS_PERIODIC_ARRAY_SIZE ? "" : ",");
if (cnt < 0 || cnt >= sz)
return 1;
output += cnt;
sz -= cnt;
}
cnt = snprintf(output, sz, "]");
if (cnt < 0 || cnt >= sz)
return 1;
return 0; // no error
}
/* #brief Create a GPS struct with data for testing. It will populate the
* point field of GPS_periodic_t. Lat starts from 0.0 and increases by 1*10^(-8)
* and Long will dstart at 99.99999999 and dec by 1*10^(-8)
*
* #param o_gps_60sec Output array of GPS structs
*/
void getLastMinGPSdata(GPS_periodic_t *o_gps_60sec)
{
//Fill in GPS related data here
int i = 0;
double latitude = o_gps_60sec[0].point.latitude;
double longitude = o_gps_60sec[0].point.longitude;
for (i = 0; i < 60; i++)
{
o_gps_60sec[i].point.latitude = latitude + (0.00000001 * (float)g_last +
0.00000001 * (float)i);
o_gps_60sec[i].point.longitude = longitude - (0.00000001 * (float)g_last +
0.00000001 * (float)i);
}
g_last = 60;
}
/* #brief Get the GPS data and convert it into a string
* #param trk Track structure with GPS string
*/
int getEvent_Track(Track_json_t *trk)
{
getLastMinGPSdata(l_gps_60Sec);
get_gps60secString(l_gps_60Sec, trk->Gnss60, GPS_STRING_SIZE);
return 0;
}
int main()
{
fptr = fopen("gpsAno.txt", "a");
if (fptr == NULL) {
printf("Error!!\n");
exit(1);
}
//Quit at signal interrupt
signal(SIGINT, userSignalInterrupt);
Track_json_t trk;
memset(&l_gps_60Sec, 0x00, sizeof(GPS_periodic_t) * GPS_PERIODIC_ARRAY_SIZE);
//Init Points to be zero and 99.99999999
int i = 0;
for (i = 0; i < 60; i++) {
l_gps_60Sec[i].point.latitude = 00.00000000;
l_gps_60Sec[i].point.longitude = 99.99999999;
}
do {
memset(&trk, 0, sizeof(Track_json_t));
getEvent_Track(&trk);
//Write to file
fprintf(fptr, "%s", trk.Gnss60);
fflush(fptr);
sleep(1);
} while (res == 0);
//close and exit
fclose(fptr);
return 0;
}
Note: Error was not recreated in the above code.
Because this doesn't have the strcat pitfalls.
I tested this function in the embedded application.
Through this I was able to find that the snprintf returns an error and the string created ended up to be:
[17.42401750,78.46098717],[17.42402083,53.62
It ended there (because of the return 1).
Does this mean that the data which was passed to snprints corrupted? It's a float value. How can it get corrupted?
Solution
The error have not been seen since I changed the sprintf function with one that doesn't directly deal with 64 bits of data.
Here's the function modp_dtoa2
/** \brief convert a floating point number to char buffer with a
* variable-precision format, and no trailing zeros
*
* This is similar to "%.[0-9]f" in the printf style, except it will
* NOT include trailing zeros after the decimal point. This type
* of format oddly does not exists with printf.
*
* If the input value is greater than 1<<31, then the output format
* will be switched exponential format.
*
* \param[in] value
* \param[out] buf The allocated output buffer. Should be 32 chars or more.
* \param[in] precision Number of digits to the right of the decimal point.
* Can only be 0-9.
*/
void modp_dtoa2(double value, char* str, int prec)
{
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
int count;
double diff = 0.0;
char* wstr = str;
int neg= 0;
int whole;
double tmp;
uint32_t frac;
/* Hacky test for NaN
* under -fast-math this won't work, but then you also won't
* have correct nan values anyways. The alternative is
* to link with libmath (bad) or hack IEEE double bits (bad)
*/
if (! (value == value)) {
str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
return;
}
if (prec < 0) {
prec = 0;
} else if (prec > 9) {
/* precision of >= 10 can lead to overflow errors */
prec = 9;
}
/* we'll work in positive values and deal with the
negative sign issue later */
if (value < 0) {
neg = 1;
value = -value;
}
whole = (int) value;
tmp = (value - whole) * pow10[prec];
frac = (uint32_t)(tmp);
diff = tmp - frac;
if (diff > 0.5) {
++frac;
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
if (frac >= pow10[prec]) {
frac = 0;
++whole;
}
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
/* if halfway, round up if odd, OR
if last digit is 0. That last part is strange */
++frac;
}
/* for very large numbers switch back to native sprintf for exponentials.
anyone want to write code to replace this? */
/*
normal printf behavior is to print EVERY whole number digit
which can be 100s of characters overflowing your buffers == bad
*/
if (value > thres_max) {
sprintf(str, "%e", neg ? -value : value);
return;
}
if (prec == 0) {
diff = value - whole;
if (diff > 0.5) {
/* greater than 0.5, round up, e.g. 1.6 -> 2 */
++whole;
} else if (diff == 0.5 && (whole & 1)) {
/* exactly 0.5 and ODD, then round up */
/* 1.5 -> 2, but 2.5 -> 2 */
++whole;
}
//vvvvvvvvvvvvvvvvvvv Diff from modp_dto2
} else if (frac) {
count = prec;
// now do fractional part, as an unsigned number
// we know it is not 0 but we can have leading zeros, these
// should be removed
while (!(frac % 10)) {
--count;
frac /= 10;
}
//^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2
// now do fractional part, as an unsigned number
do {
--count;
*wstr++ = (char)(48 + (frac % 10));
} while (frac /= 10);
// add extra 0s
while (count-- > 0) *wstr++ = '0';
// add decimal
*wstr++ = '.';
}
// do whole part
// Take care of sign
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
if (neg) {
*wstr++ = '-';
}
*wstr='\0';
strreverse(str, wstr-1);
}
Here's (part of) my unabashedly opinionated guide on safe string handling in C. Normally, I would promote dynamic memory allocation instead of fixed-length strings, but in this case I'm assuming that in the embedded environment that might be problematic. (Although assumptions like that should always be checked.)
So, first things first:
Any function which creates a string in a buffer must be told explicitly how long the buffer is. This is non-negotiable.
As should be obvious, it's impossible for a function filling a buffer to check for buffer overflow unless it knows where the buffer ends. "Hope that the buffer is long enough" is not a viable strategy. "Document the needed buffer length" would be fine if everyone carefully read the documentation (they don't) and if the required length never changes (it will). The only thing that's left is an extra argument, which should be of type size_t (because that's the type of buffer lengths in the C library functions which require lengths).
Forget that strncpy and strncat exist. Also forget about strcat. They are not your friends.
strncpy is designed for a specific use case: ensuring that an entire fixed-length buffer is initialised. It is not designed for normal strings, and since it doesn't guarantee that the output is NUL-terminated, it doesn't produce a string.
If you're going to NUL-terminate yourself anyway, you might as well use memmove, or memcpy if you know that the source and destination don't overlap, which should almost always be the case. Since you'll want the memmove to stop at the end of the string for short strings (which strncpy does not do), measure the string length first with strnlen: strnlen takes a maximum length, which is precisely what you want in the case that you are going move a maximum number of characters.
Sample code:
/* Safely copy src to dst where dst has capacity dstlen. */
if (dstlen) {
/* Adjust to_move will have maximum value dstlen - 1 */
size_t to_move = strnlen(src, dstlen - 1);
/* copy the characters */
memmove(dst, src, to_move);
/* NUL-terminate the string */
dst[to_move] = 0;
}
strncat has a slightly more sensible semantic, but it's practically never useful because in order to use it, you already have to know how many bytes you could copy. In order to know that, in practice, you need to know how much space is left in your output buffer, and to know that you need to know where in the output buffer the copy will start. [Note 1]. But if you already know where the copy will start, what's the point of searching through the buffer from the beginning to find the copy point? And if you do let strncat do the search, how sure are you that your previously computed start point is correct?
In the above code snippet, we already computed the length of the copy. We can extend that to do an append without rescanning:
/* Safely copy src1 and then src2 to dst where dst has capacity dstlen. */
/* Assumes that src1 and src2 are not contained in dst. */
if (dstlen) {
/* Adjust to_move will have maximum value dstlen - 1 */
size_t to_move = strnlen(src1, dstlen - 1);
/* Copy the characters from src1 */
memcpy(dst, src1, to_move);
/* Adjust the output pointer and length */
dst += to_move;
dstlen -= to_move;
/* Now safely copy src2 to just after src1. */
to_move = strnlen(src2, dstlen - 1);
memcpy(dst, src2, to_move);
/* NUL-terminate the string */
dst[to_move] = 0;
}
It might be that we want the original values of dst and dstlen after creating the string, and it might also be that we want to know how many bytes we inserted into dst in all. In that case, we would probably want to make copies of those variables before doing the copies, and save the cumulative sum of moves.
The above assumes that we're starting with an empty output buffer, but perhaps that isn't the case. Since we still need to know where the copy will start in order to know how many characters we can put at the end, we can still use memcpy; we just need to scan the output buffer first to find the copy point. (Only do this if there is no alternative. Doing it in a loop instead of recording the next copy point is Shlemiel the Painter's algorithm.)
/* Safely append src to dst where dst has capacity dstlen and starts
* with a string of unknown length.
*/
if (dstlen) {
/* The following code will "work" even if the existing string
* is not correctly NUL-terminated; the code will not copy anything
* from src, but it will put a NUL terminator at the end of the
* output buffer.
*/
/* Figure out where the existing string ends. */
size_t prefixlen = strnlen(dst, dstlen - 1);
/* Update dst and dstlen */
dst += prefixlen;
dstlen -= prefixlen;
/* Proceed with the append, as above. */
size_t to_move = strnlen(src, dstlen - 1);
memmove(dst, src, to_move);
dst[to_move] = 0;
}
Embrace snprintf. It really is your friend. But always check its return value.
Using memmove, as above, is slightly awkward. It requires you to manually check that the buffer's length is not zero (otherwise subtracting one would be disastrous since the length is unsigned), and it requires you to manually NUL-terminate the output buffer, which is easy to forget and the source of many bugs. It is very efficient, but sometimes it's worth sacrificing a little efficiency so that your code is easier to write and easier to read and verify.
And that leads us directly to snprintf. For example, you can replace:
if (dstlen) {
size_t to_move = strnlen(src, dstlen - 1);
memcpy(dst, src, to_move);
dst[to_move] = 0;
}
with the much simpler
int copylen = snprintf(dst, dstlen, "%s", src);
That does everything: checks that dstlen is not 0; only copies the characters from src which can fit in dst, and correctly NUL-terminates dst (unless dstlen was 0). And the cost is minimal; it takes very little time to parse the format string "%s" and most implementations are pretty well optimised for this case. [Note 2]
But snprintf is not a panacea. There are still a couple of really important warnings.
First, the documentation for snprintf makes clear that it is not permitted for any input argument to overlap the output range. (So it replaces memcpy but not memmove.) Remember that overlap includes NUL-terminators, so the following code which attempts to double the string in str instead leads to Undefined Behaviour:
char str[BUFLEN];
/* Put something into str */
get_some_data(str, BUFLEN);
/* DO NOT DO THIS: input overlaps output */
int result = snprintf(str, BUFLEN, "%s%s", str, str);
/* DO NOT DO THIS EITHER; IT IS STILL UB */
size_t len = strnlen(str, cap - 1);
int result = snprintf(str + len, cap - len, "%s", str);
The problem with the second invocation of snprintf is that the NUL which terminates str is precisely at str + len, the first byte of the output buffer. That's an overlap, so it's illegal.
The second important note about snprintf is that it returns a value, which must not be ignored. The value returned is not the length of the string created by snprintf. It's the length the string would have been had it not been truncated to fit in the output buffer.
If no truncation occurred, then the result is the length of the result, which must be strictly less than the size of the output buffer (because there must be room for a NUL terminator, which is not considered part of the length of the result.) You can use this fact to check whether truncation occurred:
if (result >= dstlen) /* Output was truncated */
This can be used, for example, to redo the snprintf with a larger, dynamically-allocated buffer (of size result + 1; never forget the need to NUL-terminate).
But remember that the result is an int -- that is, a signed value. That means that snprintf cannot cope with very long strings. That's not likely to be an issue in embedded code, but on systems where it's conceivable that strings exceed 2GB, you may not be able to safely use %s formats in snprintf. It also means that snprintf is allowed to return a negative value to indicate an error. Very old implementations of snprintf returned -1 to indicate truncation, or in response to being called with buffer length 0. That's not standard behaviour according to C99 (nor recent versions of Posix), but you should be prepared for it.
Standard-compliant implementations of snprintf will return a negative value if the buffer length argument is too big to fit in a (signed) int; it's not clear to me what the expected return value is if the buffer length is OK but the untruncated length is too big for an int. A negative value will also be returned if you used a conversion which resulted in an encoding error; for example, a %lc conversion whose corresponding argument contains an integer which cannot be converted to a multibyte (typically UTF-8) sequence.
In short, you should always check the return value of snprintf (recent gcc/glibc versions will produce a warning if you do not), and you should be prepared for it to be negative.
So, with all that behind us, let's write a function which produces a string of co-ordinate pairs:
/* Arguments:
* buf the output buffer.
* buflen the capacity of buf (including room for trailing NUL).
* points a vector of struct Point pairs.
* npoints the number of objects in points.
* Description:
* buf is overwritten with a comma-separated list of points enclosed in
* square brackets. Each point is output as a comma-separated pair of
* decimal floating point numbers enclosed in square brackets. No more
* than buflen - 1 characters are written. Unless buflen is 0, a NUL is
* written following the (possibly-truncated) output.
* Return value:
* If the output buffer contains the full output, the number of characters
* written to the output buffer, not including the NUL terminator.
* If the output was truncated, (size_t)(-1) is returned.
*/
size_t sprint_points(char* buf, size_t buflen,
struct Point const* points, size_t npoints)
{
if (buflen == 0) return (size_t)(-1);
size_t avail = buflen;
char delim = '['
while (npoints) {
int res = snprintf(buf, avail, "%c[%f,%f]",
delim, points->lat, points->lon);
if (res < 0 || res >= avail) return (size_t)(-1);
buf += res; avail -= res;
++points; --npoints;
delim = ',';
}
if (avail <= 1) return (size_t)(-1);
strcpy(buf, "]");
return buflen - (avail - 1);
}
Notes
You will often see code like this:
strncat(dst, src, sizeof(src)); /* NEVER EVER DO THIS! */
Telling strncat not to append more characters from src than can fit in src is obviously pointless (unless src is not correctly NUL-terminated, in which case you have a bigger problem). More importantly, it does absolutely nothing to protect you from writing beyond the end of the output buffer, since you have not done anything to check that dst has room for all those characters. So about all it does is get rid of compiler warnings about the unsafety of strcat. Since this code is exactly as unsafe as strcat was, you probably would be better off with the warning.
You might even find a compiler which understands snprintf will enough to parse the format string at compile time, so the convenience comes at no cost at all. (And if your current compiler doesn't do this, no doubt a future version will.) As with any use of the *printf family, you should never try to economize keystrokes by
leaving out the format string (snprintf(dst, dstlen, src) instead of snprintf(dst, dstlen, "%s", src).) That's unsafe (it has undefined behaviour if src contains an unduplicated %). And it's much slower because the library function has to parse the entire string to be copied looking for percent signs, instead of just copying it to the output.
Code is using functions that expect pointers to string, yet not always passing pointers to strings as arguments.
Stray characters seen at output of snprintf
A string must have a terminating null character.
strncat(char *, .... expects the first parameter to be a pointer to a string. memcpy(output, "[",1); does not insure that. #Jeremy
memcpy(output, "[",1);
...
strncat(output, temp,sizeof(temp));
This is a candidate source of stray characters.
strncat(...., ..., size_t size). itself is a problem as the size is the amount of space available for concatenating (minus the null character). The size available to char * output is not passed in. #Jonathan Leffler. Might as well do strcat() here.
Instead, pass in the size available to output to prevent buffer overflow.
#define N 60
int get_gps60secString(GPS_periodic_t input[N], char *output, size_t sz) {
int cnt = snprintf(output, sz, "[");
if (cnt < 0 || cnt >= sz)
return 1;
output += cnt;
sz -= cnt;
int i = 0;
for (i = 0; i < N; i++) {
cnt = snprintf(output, size, "[%0.8f,%0.8f]%s", input[i].point.latitude,
input[i].point.longitude, i + 1 == N ? "" : ",");
if (cnt < 0 || cnt >= sz)
return 1;
output += cnt;
sz -= cnt;
}
cnt = snprintf(output, sz, "]");
if (cnt < 0 || cnt >= sz)
return 1;
return 0; // no error
}
OP has posted more code - will review.
Apparently the buffer char *output is pre-filled with 0 before the get_gps60secString() so the missing null character from memcpy(output, "[",1); should not cause the issue - hmmmmmm
unsigned short SendTrack() does not return a value. 1) Using its result value is UB. 2) Enable all compiler warnings.
I'm making a raytracing engine in C using the minilibX library.
I want to be able to read in a .conf file the configuration for the scene to display:
For example:
(Az#Az 117)cat universe.conf
#randomcomment
obj:eye:x:y:z
light:sun:100
light:moon:test
The number of objects can vary between 1 and the infinite.
From now on, I'm reading the file, copying each line 1 by 1 in a char **tab, and mallocing by the number of objects found, like this:
void open_file(int fd, struct s_img *m)
{
int i;
char *s;
int curs_obj;
int curs_light;
i = 0;
curs_light = 0;
curs_obj = 0;
while (s = get_next_line(fd))
{
i = i + 1;
if (s[0] == 'l')
{
m->lights[curs_light] = s;
curs_light = curs_light + 1;
}
else if (s[0] == 'o')
{
m->objs[curs_obj] = s;
curs_obj = curs_obj + 1;
}
else if (s[0] != '#')
{
show_error(i, s);
stop_parsing(m);
}
}
Now, I want to be able to store each information of each tab[i] in a new char **tab, 1 for each object, using the ':' as a separation.
So I need to initialize and malloc an undetermined number of char **tab. How can I do that?
(Ps: I hope my code and my english are good enough for you to understand. And I'm using only the very basic function, like read, write, open, malloc... and I'm re-building everything else, like printf, get_line, and so on)
You can't allocate an indeterminate amount of memory; malloc doesn't support it. What you can do is to allocate enough memory for now and revise that later:
size_t buffer = 10;
char **tab = malloc(buffer);
//...
if (indexOfObjectToCreate > buffer) {
buffer *= 2;
tab = realloc(tab, buffer);
}
I'd use an alternative approach (as this is c, not c++) and allocate simply large buffers as we go by:
char *my_malloc(size_t n) {
static size_t space_left = 0;
static char *base = NULL;
if (base==NULL || space_left < n) base=malloc(space_left=BIG_N);
base +=n; return base-n;
}
Disclaimer: I've omitted the garbage collection stuff and testing return values and all safety measures to keep the routine short.
Another way to think this is to read the file in to a large enough mallocated array (you can check it with ftell), scan the buffer, replace delimiters, line feeds etc. with ascii zero characters and remember the starting locations of keywords.
I am currently programming in C to find the complexity of functions in a program based on the number of lines in the functions. I will have to fopen an existing C file and proceed with the calculation. I know that there maybe some builtin tools for finding it. But still I want it to be programmed manually. Is there any specific method to find the start and end of the various functions in a C file?
Run this through C preprocessor. This way you strip comments, unroll macros, include #includes etc. Unless you want complexity of the user-readable code, this will produce results much more true.
Remove fixed strings. Anything between "" goes, note escaped quote \" doesn't close the string.
Scan the file. First { increases count of functions and begins scanning the body of a function. Observe depth. { increases depth, } decreases, as depth reaches 0 another } is the end of the function. Next { will be a new function, but as you scan the outside, if before reaching next { or EOF you encounter a ; - cancel any data collected on the last piece. That wasn't a function, it was a struct, an union or something like that.
I would recommend a 2-pass approach.
Pass 1: Remove any open or close braces inside comments (and optionally those in preprocessor directives).
Pass 2: Count open and close braces and whenever they match up (#open == #close) a function ends. The next open brace denotes the start of a new function.
This approach is not fail-safe. It may fail if the code contains preprocessor statements that violate good programming practice. If you encounter such code you may want to run your tool on the code after it has passed through the preprocessor stage.
I finally found a nice way to do this!
doxygen already does a lot of things to process functions and other things nicely.
generate doxygen conf like doxygen -g doxygen_conf
open the conf file with your favorite editor and set GENERATE_XML = YES. You might also wanna set RECURSIVE = YES and others needed for your project, and run doxygen. set also INPUT = [PATH_TO_PROJECT_BASE].
In your doxygen build directory, you will find html/ and xml/.
cd80#cd80 ~/lab/VulnVizOnLinux/linux-5.4.109 » cd build_doc
cd80#cd80 ~/lab/VulnVizOnLinux/linux-5.4.109/build_doc » ls
ExtractFunctions.ipynb html xml
cd80#cd80 ~/lab/VulnVizOnLinux/linux-5.4.109/build_doc »
(ignore ExtractFunctions.ipynb, that's mine)
cd to xml and open any of xml files and analyze it for a while.
Here's how I did it.
import os
import xml.etree.ElementTree as ET
base_path = '/home/cd80/lab/VulnVizOnLinux/linux-5.4.109/'
open_files = {}
doc = ET.parse('/home/cd80/lab/VulnVizOnLinux/linux-5.4.109/build_doc/xml/4_2kernel_2module-plts_8c.xml')
root = doc.getroot()
for func in root.findall(".//memberdef/[#kind='function']"):
name = func.find('./name').text
location = func.find('./location')
if 'bodyend' not in location.keys():
continue # this memberdef is not a definition of function
bodystart = int(location.attrib.get('bodystart'))
bodyend = int(location.attrib.get('bodyend'))
file_path = location.attrib.get('file')
file_path = os.path.join(base_path, file_path)
if file_path not in open_files.keys():
with open(file_path, 'rb') as f:
code = f.read().decode('utf-8')
open_files[file_path] = code
else:
code = open_files[file_path]
func_def = '\n'.join(code.split("\n")[bodystart-1:bodyend])
print(func_def)
print('='*30)
Result:
static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
enum aarch64_insn_register reg)
{
u32 adrp, add;
adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP);
add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_ADSB_ADD);
return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) };
}
==============================
struct plt_entry get_plt_entry(u64 dst, void *pc)
{
struct plt_entry plt;
static u32 br;
if (!br)
br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
AARCH64_INSN_BRANCH_NOLINK);
plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16);
plt.br = cpu_to_le32(br);
return plt;
}
==============================
bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
{
u64 p, q;
/*
* Check whether both entries refer to the same target:
* do the cheapest checks first.
* If the 'add' or 'br' opcodes are different, then the target
* cannot be the same.
*/
if (a->add != b->add || a->br != b->br)
return false;
p = ALIGN_DOWN((u64)a, SZ_4K);
q = ALIGN_DOWN((u64)b, SZ_4K);
/*
* If the 'adrp' opcodes are the same then we just need to check
* that they refer to the same 4k region.
*/
if (a->adrp == b->adrp && p == q)
return true;
return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) ==
(q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
}
==============================
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
}
==============================
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym)
{
struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
&mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries;
int j = i - 1;
u64 val = sym->st_value + rela->r_addend;
if (is_forbidden_offset_for_adrp(&plt[i].adrp))
i++;
plt[i] = get_plt_entry(val, &plt[i]);
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
if (j >= 0 && plt_entries_equal(plt + i, plt + j))
return (u64)&plt[j];
pltsec->plt_num_entries += i - j;
if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
return 0;
return (u64)&plt[i];
}
==============================
static int cmp_rela(const void *a, const void *b)
{
const Elf64_Rela *x = a, *y = b;
int i;
/* sort by type, symbol index and addend */
i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
if (i == 0)
i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
if (i == 0)
i = cmp_3way(x->r_addend, y->r_addend);
return i;
}
==============================
static bool duplicate_rel(const Elf64_Rela *rela, int num)
{
/*
* Entries are sorted by type, symbol index and addend. That means
* that, if a duplicate entry exists, it must be in the preceding
* slot.
*/
return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
}
==============================
static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
Elf64_Word dstidx, Elf_Shdr *dstsec)
{
unsigned int ret = 0;
Elf64_Sym *s;
int i;
for (i = 0; i < num; i++) {
u64 min_align;
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
break;
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
* This is not simply a heuristic, it is a fundamental
* limitation, since there is no guaranteed way to emit
* PLT entries sufficiently close to the branch if the
* section size exceeds the range of a branch
* instruction. So ignore relocations against defined
* symbols if they live in the same section as the
* relocation target.
*/
s = syms + ELF64_R_SYM(rela[i].r_info);
if (s->st_shndx == dstidx)
break;
/*
* Jump relocations with non-zero addends against
* undefined symbols are supported by the ELF spec, but
* do not occur in practice (e.g., 'jump n bytes past
* the entry point of undefined function symbol f').
* So we need to support them, but there is no need to
* take them into consideration when trying to optimize
* this code. So let's only check for duplicates when
* the addend is zero: this allows us to record the PLT
* entry address in the symbol table itself, rather than
* having to search the list for duplicates each time we
* emit one.
*/
if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
ret++;
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
case R_AARCH64_ADR_PREL_PG_HI21:
if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
!cpus_have_const_cap(ARM64_WORKAROUND_843419))
break;
/*
* Determine the minimal safe alignment for this ADRP
* instruction: the section alignment at which it is
* guaranteed not to appear at a vulnerable offset.
*
* This comes down to finding the least significant zero
* bit in bits [11:3] of the section offset, and
* increasing the section's alignment so that the
* resulting address of this instruction is guaranteed
* to equal the offset in that particular bit (as well
* as all less signficant bits). This ensures that the
* address modulo 4 KB != 0xfff8 or 0xfffc (which would
* have all ones in bits [11:3])
*/
min_align = 2ULL << ffz(rela[i].r_offset | 0x7);
/*
* Allocate veneer space for each ADRP that may appear
* at a vulnerable offset nonetheless. At relocation
* time, some of these will remain unused since some
* ADRP instructions can be patched to ADR instructions
* instead.
*/
if (min_align > SZ_4K)
ret++;
else
dstsec->sh_addralign = max(dstsec->sh_addralign,
min_align);
break;
}
}
if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
cpus_have_const_cap(ARM64_WORKAROUND_843419))
/*
* Add some slack so we can skip PLT slots that may trigger
* the erratum due to the placement of the ADRP instruction.
*/
ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
return ret;
}
==============================
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
Elf_Shdr *pltsec, *tramp = NULL;
int i;
/*
* Find the empty .plt section so we can expand it to store the PLT
* entries. Record the symtab address as well.
*/
for (i = 0; i < ehdr->e_shnum; i++) {
if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
mod->arch.core.plt_shndx = i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt_shndx = i;
else if (!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
if (!mod->arch.core.plt_shndx || !mod->arch.init.plt_shndx) {
pr_err("%s: module PLT section(s) missing\n", mod->name);
return -ENOEXEC;
}
if (!syms) {
pr_err("%s: module symtab section missing\n", mod->name);
return -ENOEXEC;
}
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
if (sechdrs[i].sh_type != SHT_RELA)
continue;
/* ignore relocations that operate on non-exec sections */
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
/* sort by type, symbol index and addend */
sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
core_plts += count_plts(syms, rels, numrels,
sechdrs[i].sh_info, dstsec);
else
init_plts += count_plts(syms, rels, numrels,
sechdrs[i].sh_info, dstsec);
}
pltsec = sechdrs + mod->arch.core.plt_shndx;
pltsec->sh_type = SHT_NOBITS;
pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
pltsec->sh_addralign = L1_CACHE_BYTES;
pltsec->sh_size = (core_plts + 1) * sizeof(struct plt_entry);
mod->arch.core.plt_num_entries = 0;
mod->arch.core.plt_max_entries = core_plts;
pltsec = sechdrs + mod->arch.init.plt_shndx;
pltsec->sh_type = SHT_NOBITS;
pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
pltsec->sh_addralign = L1_CACHE_BYTES;
pltsec->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts;
if (tramp) {
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
tramp->sh_size = sizeof(struct plt_entry);
}
return 0;
}
==============================
Dirty but works just as how I wanted
#include<reg51.h>
#include<string.h>
#include"_LCD_R8C.c"
unsigned char c[12];
unsigned char chr[11];
void serial_int (void) interrupt 4
{
if (RI==1)
{
chr[11] = SBUF;
RI = 0;
TI = 0;
}
}
int main()
{
unsigned char a[2][11]={"$0016221826","$0123456789"};
int i,j;
lcd_init();
lcd_clear();
SCON = 0x50;
TMOD = 0x20;
TH1 = 0xFD;
ET0 = 0;
TR1 = 1;
RI = 1;
ES = 1;
EA = 1;
for(j=0;j<1;j++)
{
for(i=0;i<=10;i++)
{
c[i]=chr[i];
}
c[11]='\0';
}
for(i=0;i<=1;i++)
{
j=strcmp(a[i],c); /* !!! Here is the problem !!! */
if(j==0)
{
lcd_printxy(1,1,"yes");
}
else
{
lcd_printxy(1,6,"no");
}
}
}
I am getting the display as "no", please let me know what is the problem?
the problem might be
1) the received array of characters are not converted to string, or
2) the received array of characters are converted to string but not able to compare with the available string..
please go through the program
One obvious bug for starters - change:
unsigned char a[2][11]={"$0016221826","$0123456789"};
to:
unsigned char a[2][12]={"$0016221826","$0123456789"};
(You need to allow room for the terminating '\0' in each string - I'm surprised your compiler didn't complain about this ?)
Also, this line in your interrupt handler is wrong:
chr[11] = SBUF;
Several problems with this - char only has storage for 11 chars, not 12, and you probably want to be accumulating characters from index 0 and then bumping the index, otherwise you're just overwriting the same character each time.
Looking at the rest of the code there are so many other problems that I think you may need to take a step back here and start with a simpler program - get that working first and then add to it in stages.
You might also want to get a decent introductory book on C and study it as there are lots of very basic mistakes in the code, so you might benefit from a better understanding of the language itself.
You only assign a value to chr[11], the rest of the array is uninitialized and will contain random data. You then copy this array containing random data to c (you could use e.g. memcpy here instead of looping yourself), and finally you compare the complete contents of c (which is random data) with one of the entries in a. So it's kind of natural that the result of that comparison will be that the strings are not equal.
Edit: A redesign of the program in the question
Your program has too many problems to be easily fixed, so I decided to try and rewrite it:
#include <reg51.h>
#include <string.h>
#include "_LCD_R8C.c"
#define INPUT_LENGTH 11
#define ACCEPTABLE_INPUT_COUNT 2
char input[INPUT_LENGTH]; /* The input from the serial port */
int input_pos = 0; /* Current position to write in the input buffer */
int input_done = 0; /* 0 = not done yet, 1 = all input read */
void serial_int (void) interrupt 4
{
if (!input_done && RI == 1)
{
/* Put the input at next position in the input buffer */
/* Then increase the position */
input[input_pos++] = SBUF;
RI = 0;
TI = 0;
/* Check if we have received all input yet */
if (input_pos >= INPUT_LENGTH)
input_done = 1;
}
}
int main()
{
/* Array of data that this programs thinks is acceptable */
/* +1 to the input length, to fit the terminating '\0' character */
char acceptable_inputs[ACCEPTABLE_INPUT_COUNT][INPUT_LENGTH + 1] = {
"$0016221826", "$0123456789"
};
iny acceptable_found = 0; /* Acceptable input found? */
/* Initialization */
lcd_init();
lcd_clear();
SCON = 0x50;
TMOD = 0x20;
TH1 = 0xFD;
ET0 = 0;
TR1 = 1;
RI = 1;
ES = 1;
EA = 1;
/* Wait until we have received all input */
while (!input_done)
; /* Do nothing */
/* Check the received input for something we accept */
for (int i = 0; i < ACCEPTABLE_INPUT_COUNT; i++)
{
if (memcmp(acceptable_inputs[i], input, INPUT_LENGTH) == 0)
{
/* Yes, the data received is acceptable */
acceptable_found = 1;
break; /* Don't have to check any more */
}
}
if (acceptable_found)
lcd_printxy(1, 1, "Yes");
else
lcd_printxy(1, 1, "No");
return 0;
}