How to handle this interrupt-driven state tracking neatly in C? - c
An external module sends the string "CMD\n" to my program one character at a time through interrupts. It is important to know where in the sequence the module is so that I can troubleshoot. This is the way I'm currently handling tracking of the module:
// Enumeration describing the different states
typedef enum {
BTSTATE_ENTERING_CMD_C, // awaiting "C"
BTSTATE_ENTERING_CMD_M,
BTSTATE_ENTERING_CMD_D,
BTSTATE_ENTERING_CMD_EOL,
BTSTATE_CMD
} btstate_t;
// State variable
btstate_t btstate = BTSTATE_ENTERING_CMD_C;
// function called every time a new character is sent
ISR(USART_RX_vect) {
uint8_t rcv = UDR0; // the received character
if ( btstate == BTSTATE_ENTERING_CMD_C && rcv == 'C') {
btstate = BTSTATE_ENTERING_CMD_M;
} else if ( btstate == BTSTATE_ENTERING_CMD_M && rcv == 'M') {
btstate = BTSTATE_ENTERING_CMD_D;
} else if ( btstate == BTSTATE_ENTERING_CMD_D && rcv == 'D') {
btstate = BTSTATE_ENTERING_CMD_EOL;
} else if ( btstate == BTSTATE_ENTERING_CMD_EOL && rcv == '\n') {
btstate = BTSTATE_CMD;
} else {
// error handling here
}
}
Intuitively, there seems to be a lot of redundancy in the code. Is there a better or more canonical way to achieve the same result?
What about this ? It's pretty readable and easy to modify.
// Enumeration describing the different states
typedef enum {
BTSTATE_ENTERING_CMD_C, // awaiting "C"
BTSTATE_ENTERING_CMD_M,
BTSTATE_ENTERING_CMD_D,
BTSTATE_ENTERING_CMD_EOL,
BTSTATE_CMD
} btstate_t;
// State variable
btstate_t btstate = BTSTATE_ENTERING_CMD_C;
struct cmp
{
btstate_t state;
btstate_t next_state;
uint8_t c;
} t_cmp;
ISR(USART_RX_vect) {
static t_cmp cmp_array[] = {
{BTSTATE_ENTERING_CMD_C, BTSTATE_ENTERING_CMD_M, 'C'},
{BTSTATE_ENTERING_CMD_M, BTSTATE_ENTERING_CMD_D, 'M'},
{BTSTATE_ENTERING_CMD_D, BTSTATE_ENTERING_CMD_EOL, 'D'},
{BTSTATE_ENTERING_CMD_EOL, BTSTATE_CMD, '\n'}
};
static int array_size = sizeof(cmp_array) / sizeof(cmp_array[0]);
uint8_t rcv = UDR0; // the received character
int i;
for (i = 0; i < array_size; ++i)
{
if (btstate == cmp_array[i].state && rcv == cmp_array[i].c)
{
btstate = cmp_array[i].next_state;
break ;
}
}
if (i == array_size)
// error handling here
}
Something like the following will test an incoming stream to confirm that it
matches a string:
static const char leader[] = "CMD\n";
uint8_t btstate = 0;
ISR(USART_RX_vect) {
uint8_t rcv = UDR0;
if (btstate < 4)
{
if (rcv == leader[btstate])
btstate++;
else
{
// error handling here
btstate = 0;
}
}
}
(untested, obviously)
If (btstate == 4) then you've got your leader string and are now receiving
whatever comes after.
An error condition which is not well handled here, which might affect the
design, is where you receive some leading garbage before the correct string.
As things stand, we'll enter into // error handling here and reset btstate,
but if rcv is now equal to the first 'C' that the sender really intended
then we've missed it, and next time around we'll expect 'C' but receive 'M'
and raise yet another error and completely miss the correct string.
Here you have two options. One is to signal the sender to reset itself (which
can be troublesome over high-latency links), and the other is to re-check
(rcv == 'C') in the error handler.
If your command string were "GABBAGABBAHEY", and you were expecting the 'H'
but you instead got 'G', then it's possible that all of the preceeding
characters were sent erroneously, or that some number of characters were sent
deliberately as the prefix of another string (or the current string).
Handling that case, and handling the case of there being multiple possible
strings, requires a structure that can take different paths depending on the
character received. In the case where you do want to tolerate leading garbage,
that structure can loop back on itself -- pointing back to the longest prefix
which matches the current state -- and in that case you don't really want to
build the table by hand.
You've said you know what string you're expecting, so I won't go into all that,
but I thought it worth mentioning for completeness.
Related
Waiting for character in string
I am currently working on a project that will be used to test whether an instrument is within tolerance or not. My test equipment will put the DUT (Device Under Test) into a "Test Mode" where it will repeatedly send a string of data every 200ms. I want to receive that data, check is is within tolerance and give it a pass or fail. My code so far (I've edited a few things out like .h files and some work related bits!): void GetData(); void CheckData(); char Data[100]; int deviceId; float a; float b; float c; void ParseString(const char* stringValue) { char* token = NULL; int tokenPlace = 0; token = strtok((char *) stringValue, ","); while (token != NULL) { switch (tokenPlace) { case 0: deviceId = atoi(token); break; case 1: a= ((float)atoi(token)) / 10.0f; break; case 2: b= ((float)atoi(token)) / 100.0f; break; case 3: c= ((float)atoi(token)) / 10.0f; break; } tokenPlace++; token = strtok(NULL, ","); } } void GetData() { int x = UART.scanf("%s,",Data); ParseString(Data); if (x !=0) { UART.printf("Device ID = %i\n\r", deviceId); UART.printf("a= %.1f\n\r", a); UART.printf("s= %.2f\n\r", b); UART.printf("c= %.1f\n\n\r", c); } if (deviceId <= 2) { CheckData(); } else { pc.printf("Device ID not recognised\n\n\r"); } } void CheckData() { if (a >= 49.9f && a< = 50.1f) { pc.printf("a Pass\n\r"); } else { pc.printf("a Fail\n\r"); } if (b >= 2.08f && b <= 2.12f) { pc.printf("b Pass\n\r"); } else { pc.printf("b Fail\n\r"); } if (c >= 20.0f && c <= 25.0f) { pc.printf("c Pass\n\n\r"); } else { pc.printf("c Fail\n\n\r"); } if (deviceId == 0) { (routine1); } else if (deviceId == 1) { (routine2); } else if (deviceId == 2) { (Routine3); } } int main() { while(1) { if(START == 0) { wait(0.1); GetData(); } } } And this works absolutely fine. I am only printing the results to a serial terminal so I can check the data is correct to make sure it is passing and failing correctly. My issue is every now and then the START button happens to be pressed during the time the string is sent and the data can be corrupt, so the deviceId fails and it will say not recognised. This means I then have to press the start button again and have another go. A the moment, it's a rare occurrence but I'd like to get rid of it if possible. I have tried adding a special character at the beginning of the string but this again gets missed sometimes. Ideally, when the start button is pressed, I would like it to wait for this special character so it knows it is at the beginning of the string, then the data would be read correctly, but I am unsure how to go about it. I have been unsuccessful in my attempts so far but I have a feeling I am overthinking it and there is a nice easy way to do it. Probably been staring at it too long now! My microcontroller is STM32F103RB and I am using the STM Nucleo with the mBed IDE as it's easy and convenient to test the code while I work on it.
You can use ParseString to return a status indicating whether a complete string is read or not. int ParseString(const char* stringValue) { /* ... your original code ... */ /* String is complete if 4 tokens are read */ return (tokenPlace == 4); } Then in GetData use the ParseString return value to determine whether to skip the string or not. void GetData() { int x = UART.scanf("%s,",Data); int result = ParseString(Data); if (!result) { /* Did not get complete string - just skip processing */ return; } /* ... the rest of your original code ... */ }
receive/transmit over rs232 with arm lpc2148 on sparkfun logomatic
I am trying to program the logomatic by sparkfun, and yes I have used their forum with no responses, and having some issues. I am trying to send characters to the UART0 and I want the logomatic to respond with specific characters and not just an echo. For example, I send 'ID?' over the terminal (using RealTerm), and the logomatic sends back '1'. All it will so now is echo. I am using c with programmers notepad with the WinARM toolchain. The following snippet is from the main.c file. I only included this, because I am fairly certain that this is where my problem lies void Initialize(void) { rprintf_devopen(putc_serial0); PINSEL0 = 0xCF351505; PINSEL1 = 0x15441801; IODIR0 |= 0x00000884; IOSET0 = 0x00000080; S0SPCR = 0x08; // SPI clk to be pclk/8 S0SPCR = 0x30; // master, msb, first clk edge, active high, no ints } Notice the rprintf_devopen function, below is from the rprintf.c file, and due to my mediocre skills, I do not understand this bit of code. If I comment out the rprintf_devopen in main, the chip never initializes correctly. static int (*putcharfunc)(int c); void rprintf_devopen( int(*put)(int) ) { putcharfunc = put; } static void myputchar(unsigned char c) { if(c == '\n') putcharfunc('\r'); putcharfunc(c); } Now, below is from the serial.c file. So my thought was that I should be able to just call one of these putchar functions in main.c and that it would work, but it still just echoes. int putchar_serial0 (int ch) { if (ch == '\n') { while (!(U0LSR & 0x20)); U0THR = CR; // output CR } while (!(U0LSR & 0x20)); return (U0THR = ch); } // Write character to Serial Port 0 without \n -> \r\n int putc_serial0 (int ch) { while (!(U0LSR & 0x20)); return (U0THR = ch); } // Write character to Serial Port 1 without \n -> \r\n int putc_serial1 (int ch) { while (!(U1LSR & 0x20)); return (U1THR = ch); } void putstring_serial0 (const char *string) { char ch; while ((ch = *string)) { putchar_serial0(ch); string++; } } I have tried calling the different putchar functions in main, also with the rprintf_devopen. Still just echoes. I have altered the putchar functions and still just echoes. I have tried just writing to the U0THR register in main.c and no luck. Keep in mind that I am still a student and my major is electrical engineering, so the only programming classes that I have taken are intro to c, and an intro to vhdl. I am more of a math and physics guy. I was working on this for an internship I was doing. The internship ended, but it just bugs me that I cannot figure this out. Honestly, working on this program taught me more that the c class that I took. Anyways, I appreciate any help that can be offered, and let me know if you want to see the entire code. Below is an update to the question. This function is in main.c static void UART0ISR(void) { char temp; trig = 13; //This is where you set the trigger character in decimal, in this case a carriage return. temp = U0RBR; //U0RBR is the receive buffer on the chip, refer to datasheet. if(temp == query1[counter1]) //This segment looks for the characters "ID?" from the U0RBR { //query1 is defined at the top of the program counter1++; if(counter1 >= 3) { flag1 = 1; //This keeps track of whether or not query1 was found counter1 = 0; stat(1,ON); delay_ms(50); stat(1,OFF); RX_in = 0; temp = 0; //rprintf("\n\rtransmission works\n"); putc_serial1(49); } } if(temp == query2[counter2] && flag1 == 1) //This segment looks for "protov?" from the U0RBR, but only after query1 has been found { counter2++; if(counter2 >= 7) { flag2 = 1; //This keeps track of whether or not query2 was found counter2 = 0; stat(1,ON); delay_ms(50); stat(1,OFF); RX_in = 0; temp = 0; putc_serial1(49); } } if(temp == stop[counter3]) //This if segment looks for certain characters in the receive buffer to stop logging { counter3++; if(counter3 >= 2) { flagstop = 1; //This flagstop keeps track of whether or not stop was found. When the stop characters are found, flag1 = 0; //the query1 and query2 flags will be reset. So, in order to log again these queries must be sent again flag2 = 0; //this may seem obvious, but deserves mention. counter3 = 0; stat(1,ON); delay_ms(500); stat(1,OFF); RX_in = 0; temp = 0; } flagstop = 0; //Reset the stop flag in order to wait once again for the query 1&2 } if(RX_in == 0) { memset (RX_array1, 0, 512); // This clears the RX_array to make way for new data memset (RX_array2, 0, 512); } if(RX_in < 512 && flag1 == 1 && flag2 == 1) //We cannot log data until we see both flags 1 & 2 and after we see these flags, { //we must then see the trigger character "carriage return" RX_array1[RX_in] = temp; RX_in++; if(temp == trig) { RX_array1[RX_in] = 10; // delimiters log_array1 = 1; RX_in = 0; } } else if(RX_in >= 512 && flag1 == 1 && flag2 == 1) //This else if is here in case the RX_in is greater than 512 because the RX_arrays are defined to { //be of size 512. If this happens we don't want to lose data, so we must put the overflow into another register. RX_array2[RX_in - 512] = temp; RX_in++; RX_array1[512] = 10; // delimiters RX_array1[512 + 1] = 13; log_array1 = 1; if(RX_in == 1024 || temp == trig) { RX_array2[RX_in - 512] = 10; // delimiters log_array2 = 1; RX_in = 0; } } temp = U0IIR; // have to read this to clear the interrupt VICVectAddr = 0; }
The execution of the code always goes into the else statement
Some very strange things happen in my source code. The following function works well and it prints 'y' when the password is correct and prints 'n' when it is incorrect. But if i add some UART1_Write and Delay functions to the else statement the bug comes out and even if the password is "zxc" (correct) it ALWAYS enters the else statement. I'm using MikroC PRO for PIC v6.0.0, the robot system is made of PIC18F452 and RN-42 bluetooth module connected to it. I am testing with a laptop with a bluetooth and TeraTerm. For more info: http://instagram.com/p/pLnU9eDL8z/# Here it is the well working routine: void authenticate() { char *input = ""; char *password = "zxc\0"; unsigned char ready = 0; while (connected && !ready) { if (UART1_Data_Ready()) { UART1_Read_Text(input, "|", 17); strcat(input, "\0"); if (strcmp(input, password) == 0) { UART1_Write('y'); ready = 1; } else { UART1_Write('n'); ready = 1; } } } } This version of the routine ALWAYS goes in the ELSE statement of the strcmp(input, password) == 0 part: void authenticate() { char *input = ""; char *password = "zxc\0"; unsigned char ready = 0; while (connected && !ready) { if (UART1_Data_Ready()) { UART1_Read_Text(input, "|", 17); strcat(input, "\0"); if (strcmp(input, password) == 0) { UART1_Write('y'); ready = 1; } else { UART1_Write('n'); Delay_ms(100); UART1_Write('$'); Delay_ms(100); UART1_Write('$'); Delay_ms(100); UART1_Write('$'); Delay_ms(100); UART1_Write('K'); Delay_ms(100); UART1_Write(','); Delay_ms(100); UART1_Write('-'); Delay_ms(100); UART1_Write('-'); Delay_ms(100); UART1_Write('-'); Delay_ms(100); UART1_Write('\n'); ready = 1; } } } } It is important to send all these addition symbols in order to get RN-42 into command mode and disconnect the user if the password is wrong. Please help me solve the problem. Any ideas appreciated!
As others have pointed out in the comments section a major issue with your code is that you are trying to store the UART data to memory that does not belong to you. When you declare char *input = "";, you haven't actually allocated any space except for a single byte that stores '\0'. Then, when you use UART1_Read_Text(), you tell that function you may have up to 17 characters that will be read before finding the delimiter - all of which should be stored at the location pointed to by input. The description of that library function can be found here. Also, based on the library description it looks like UART1_Read_Text() already adds the null-termination to the UART data. I base this assumption off the description of UARTx_Write_Text and the example that they provide on their website. However, I would recommend that you verify that is indeed the case. Also, your initialization of password is redundant and char *password = "zxc\0" should be changed to char *password = "zxc". When you declare a string literal using double quotation marks it is automatically null-terminated. This excerpt is from "C in a Nutshell": A string literal consists of a sequence of characters (and/or escape sequences) enclosed in double quotation marks... A string literal is a static array of char that contains character codes followed by a string terminator, the null character \0... The empty string "" occupies exactly one byte in memory, which holds the terminating null character. Based on the above, I would go about it a little more like this: #define MAX_NUM_UART_RX_CHARACTERS 17 void authenticate() { char input[MAX_NUM_UART_RX_CHARACTERS + 1]; char *password = "zxc"; unsigned char ready = 0; while (connected && !ready) { if (UART1_Data_Ready()) { UART1_Read_Text(input, "|", MAX_NUM_UART_RX_CHARACTERS); if (strcmp(input, password) == 0) { UART1_Write('y'); ready = 1; } else { UART1_Write('n'); ready = 1; } } } }
Parsing code for GPS NMEA string
i am trying to parse the incoming GPGGA NMEA GPS string using Arduino uno and below code. What i am trying to do is that i am using only GPGGA NMEA string to get the values of Latitude, longitude and altitude.In my below code, i had put certain checks to check if incoming string is GPGGA or not, and then store the further string in a array which can be further parsed suing strtok function and all the 3 GPS coordinates can be easily find out. But i am unable to figure out how to store only GPGGA string and not the further string.I am using a for loop but it isn't working. I am not trying to use any library.I had came across certain existing codes like this. Here is the GPGGA string information link i am trying to have following functionlity i) Check if incoming string is GPGGA ii) If yes, then store the following string upto EOL or upto * (followed by checksum for the array) in a array, array length is variable(i am unable to find out solution for this) iii) Then parse the stored array(this is done, i tried this with a different array) #include <SoftwareSerial.h> SoftwareSerial mySerial(10,11); // 10 RX / 11 TX void setup() { Serial.begin(9600); mySerial.begin(9600); } void loop() { uint8_t x; char gpsdata[65]; if((mySerial.available())) { char c = mySerial.read(); if(c == '$') {char c1 = mySerial.read(); if(c1 == 'G') {char c2 = mySerial.read(); if(c2 == 'P') {char c3 = mySerial.read(); if(c3 == 'G') {char c4 = mySerial.read(); if(c4 == 'G') {char c5 = mySerial.read(); if(c5 == 'A') {for(x=0;x<65;x++) { gpsdata[x]=mySerial.read(); while (gpsdata[x] == '\r' || gpsdata[x] == '\n') { break; } } } else{ Serial.println("Not a GPGGA string"); } } } } } } } Serial.println(gpsdata); } Edit 1: Considering Joachim Pileborg, editing the for loop in the code. I am adding a pic to show the undefined output of the code. Input for the code: $GPGGA,092750.000,5321.6802,N,00630.3372,W,1,8,1.03,61.7,M,55.2,M,,*76 $GPGSA,A,3,10,07,05,02,29,04,08,13,,,,,1.72,1.03,1.38*0A $GPGSV,3,1,11,10,63,137,17,07,61,098,15,05,59,290,20,08,54,157,30*70 $GPGSV,3,2,11,02,39,223,19,13,28,070,17,26,23,252,,04,14,186,14*79 $GPGSV,3,3,11,29,09,301,24,16,09,020,,36,,,*76 $GPRMC,092750.000,A,5321.6802,N,00630.3372,W,0.02,31.66,280511,,,A*43 $GPGGA,092751.000,5321.6802,N,00630.3371,W,1,8,1.03,61.7,M,55.3,M,,*75 $GPGSA,A,3,10,07,05,02,29,04,08,13,,,,,1.72,1.03,1.38*0A $GPGSV,3,1,11,10,63,137,17,07,61,098,15,05,59,290,20,08,54,157,30*70 $GPGSV,3,2,11,02,39,223,16,13,28,070,17,26,23,252,,04,14,186,15*77 $GPGSV,3,3,11,29,09,301,24,16,09,020,,36,,,*76 $GPRMC,092751.000,A,5321.6802,N,00630.3371,W,0.06,31.66,280511,,,A*45
After a quick check of the linked article on the NMEA 0183 protocol, this jumped out at me: <CR><LF> ends the message. This means, that instead of just read indiscriminately from the serial port, you should be looking for that sequence. If found, you should terminate the string, and break out of the loop. Also, you might want to zero-initialize the data string to begin with, to easily see if there actually is any data in it to print (using e.g. strlen).
You could use some functions from the C library libnmea. Theres functions to split a sentence into values by comma and then parse them.
Offering this as a suggestion in support of what you are doing... Would it not be useful to replace all of the nested if()s in your loop with something like: EDIT added global string to copy myString into once captured char globalString[100];//declare a global sufficiently large to hold you results void loop() { int chars = mySerial.available(); int i; char *myString; if (chars>0) { myString = calloc(chars+1, sizeof(char)); for(i=0;i<chars;i++) { myString[i] = mySerial.read(); //test for EOF if((myString[i] == '\n') ||(myString[i] == '\r')) { //pick this... myString[i]=0;//strip carriage - return line feed(or skip) //OR pick this... (one or the other. i.e.,I do not know the requirements for your string) if(i<chars) { myString[i+1] = mySerial.read() //get remaining '\r' or '\n' myString[i+2]=0;//add null term if necessary } break; } } if(strstr(myString, "GPGGA") == NULL) { Serial.println("Not a GPGGA string"); //EDIT strcpy(globalString, "");//if failed, do not want globalString populated } else { //EDIT strcpy(globalString, myString); } } //free(myString) //somewhere when you are done with it } Now, the return value from mySerial.available() tells you exactly how many bytes to read, you can read the entire buffer, and test for validity all in one.
I have a project that will need to pull the same information out of the same sentence. I got this out of a log file import serial import time ser = serial.Serial(1) ser.read(1) read_val = ("nothing") gpsfile="gpscord.dat" l=0 megabuffer='' def buffThis(s): global megabuffer megabuffer +=s def buffLines(): global megabuffer megalist=megabuffer.splitlines() megabuffer=megalist.pop() return megalist def readcom(): ser.write("ati") time.sleep(3) read_val = ser.read(size=500) lines=read_val.split('\n') for l in lines: if l.startswith("$GPGGA"): if l[:len(l)-3].endswith("*"): outfile=open('gps.dat','w') outfile.write(l.rstrip()) outfile.close() readcom() while 1==1: readcom() answer=raw_input('not looping , CTRL+C to abort') The result is this: gps.dat $GPGGA,225714.656,5021.0474,N,00412.4420,W,0,00,50.0,0.0,M,18.0,M,0.0,0000*5B
Using "malloc" every single time you read a string is an enormous amount of computational overhead. (And didn't see the corresponding free() function call. Without that, you never get that memory back until program termination or system runs out of memory.) Just pick the size of the longest string you will ever need, add 10 to it, and declare that your string array size. Set once and done. There are several C functions for getting substrings out of a string, strtok() using the coma is probably the least overhead. You are on an embedded microcontroller. Keep it small, keep overhead down. :)
#include <stdio.h> #include <string.h> #define GNSS_HEADER_LENGTH 5 #define GNSS_PACKET_START '$' #define GNSS_TOKEN_SEPARATOR ',' #define bool int #define FALSE 0 #define TRUE 1 //To trim a string contains \r\n void str_trim(char *str){ while(*str){ if(*str == '\r' || *str == '\n'){ *str = '\0'; } str++; } } /** * To parse GNSS data by header and the index separated by comma * * $GPGSV,1,1,03,23,39,328,30,18,39,008,27,15,33,035,33,1*5A * $GNRMC,170412.000,V,,,,,,,240322,,,N,V*2D * $GNGGA,170412.000,,,,,0,0,,,M,,M,,*57 * * #data_ptr the pointer points to gps data * #header the header for parsing GPGSV * #repeat_index the header may repeat for many lines * so the header index is for identifying repeated header * #token_index is the index of the parsing data separated by "," * the start is 1 * #result to store the result of the parser input * * #result bool - parsed successfully **/ bool parse_gnss_token(char *data_ptr, char *header, int repeat_index, int token_index, char *result) { bool gnss_parsed_result = FALSE; // To check GNSS data parsing is success bool on_header = FALSE; // For header int header_repeat_counter = 0; int header_char_index = 0; // each char in header index // For counting comma int counted_token_index = 0; // To hold the result character index bool data_found = FALSE; char *result_start = result; char header_found[10]; while (*data_ptr) { // 1. Packet start if (*data_ptr == GNSS_PACKET_START) { on_header = TRUE; header_char_index = 0; // to index each character in header data_found = FALSE; // is data part found data_ptr++; } // 2. For header parsing if (on_header) { if (*data_ptr == GNSS_TOKEN_SEPARATOR || header_char_index >= GNSS_HEADER_LENGTH) { on_header = FALSE; } else { header_found[header_char_index] = *data_ptr; if (header_char_index == GNSS_HEADER_LENGTH - 1) { // Now Header found header_found[header_char_index + 1] = '\0'; on_header = FALSE; if (!strcmp(header, header_found)) { // Some headers may repeat - to identify it set the repeat index if (header_repeat_counter == repeat_index) { //printf("Header: %s\r\n", header_found ); data_found = TRUE; } header_repeat_counter++; } } header_char_index++; } } // 3. data found if (data_found) { // To get the index data separated by comma if (counted_token_index == token_index && *data_ptr != GNSS_TOKEN_SEPARATOR) { // the data to parse *result++ = *data_ptr; gnss_parsed_result = TRUE; } if (*data_ptr == GNSS_TOKEN_SEPARATOR) { // if , counted_token_index++; // The comma counter for index } // Break if the counted_token_index(token_counter) greater than token_index(search_token) if (counted_token_index > token_index) { break; } } // Appending \0 to the end *result = '\0'; // To trim the data if ends with \r or \n str_trim(result_start); // Input data data_ptr++; } return gnss_parsed_result; } int main() { char res[100]; char *nem = "\ $GNRMC,080817.000,A,0852.089246,N,07636.289920,E,0.00,139.61,270322,,,A,V*04\r\n\\r\n\ $GNGGA,080817.000,0852.089246,N,07636.289920,E,1,5,1.41,11.246,M,-93.835,M,,*5E\r\n\ $GNVTG,139.61,T,,M,0.00,N,0.00,K,A*2F\r\n\ $GNGSA,A,3,30,19,17,14,13,,,,,,,,1.72,1.41,0.98,1*0A\r\n\ $GNGSA,A,3,,,,,,,,,,,,,1.72,1.41,0.98,3*02\r\n\ $GNGSA,A,3,,,,,,,,,,,,,1.72,1.41,0.98,6*07\r\n\ $GPGSV,3,1,12,06,64,177,,30,60,138,15,19,51,322,18,17,42,356,27,1*68\r\n\ $GPGSV,3,2,12,14,36,033,17,07,34,142,17,13,32,267,17,02,21,208,,1*6C\r\n\ $GPGSV,3,3,12,15,05,286,,01,05,037,,03,03,083,,20,02,208,,1*6B\r\n\ $GAGSV,1,1,00,7*73\r\n\ $GIGSV,1,1,00,1*7D\r\n\ $GNGLL,0852.089246,N,07636.289920,E,080817.000,A,A*43\r\n\ $PQTMANTENNASTATUS,1,0,1*4F\r\n"; printf("Parsing GNRMC\r\n"); printf("===============\r\n"); for(int i=1;i<=16;i++){ parse_gnss_token(nem, "GNRMC", 0, i, res); printf("Index: %d, Result: %s\r\n", i, res); } printf("Parsing GNVTG (First Parameter)\r\n"); printf("================================"); // GNVTG - Header, 0 - Repeat Index(if header is repeating), 1 - Value Index, parse_gnss_token(nem, "GNVTG", 0, 1, res); printf("\r\nGNVTG: %s\r\n", res); return 0; }
UTF-8 to unicode converter for embeded system display
I have an embedded system that gets UTF-8 encoded data to display via UPNP. The display device has the ability to display characters. I need a way to convert the UTF-8 data I recieve via UPNP to unicode. The display is on a PIC, and it is sent data via a UPNP bridge running linux. Is there a simple way to do the conversion before I send it to the display board in linux?
If you have a real operating system and hosted C environment at your disposal, the best approach would be to simply ensure that your program runs in a locale that uses UTF-8 as its encoding and use mbrtowc or mbtowc to convert UTF-8 sequences to Unicode codepoint values (wchar_t is a Unicode codepoint number on Linux and any C implementation that defines __STDC_ISO_10646__). If you do want to skip the system library routines and do UTF-8 decoding yourself, be careful. I once did a casual survey using Google code search and found that somewhere between 1/3 and 2/3 of the UTF-8 code out in the wild was dangerously wrong. Here is a fully correct, fast, and simple implementation I would highly recommend: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ My implementation in musl is somewhat smaller in binary size and seems to be faster, but it's also a bit harder to understand.
To convert an array of bytes encoded as UFT-8 into an array of Unicode code points: The trick is to detect various encoding mistakes. #include <limits.h> #include <stdio.h> #include <stdbool.h> #include <stdint.h> typedef struct { uint32_t UnicodePoint; // Accumulated code point uint32_t Min; // Minimum acceptable codepoint int i; // Index of char/wchar_t remaining bool e; // Error flag } UTF_T; static bool IsSurrogate(unsigned c) { return (c >= 0xD800) && (c <= 0xDFFF); } // Return true if more bytes needed to complete codepoint static bool Put8(UTF_T *U, unsigned ch) { ch &= 0xFF; if (U->i == 0) { if (ch <= 0x7F) { U->UnicodePoint = ch; return false; /* No more needed */ } else if (ch <= 0xBF) { goto fail; } else if (ch <= 0xDF) { U->Min = 0x80; U->UnicodePoint = ch & 0x1F; U->i = 1; } else if (ch <= 0xEF) { U->Min = 0x800; U->UnicodePoint = ch & 0x0F; U->i = 2; } else if (ch <= 0xF7) { U->Min = 0x10000; U->UnicodePoint = ch & 0x07; U->i = 3; } else { goto fail; } return true; /* More needed */ } // If expected continuation character missing ... if ((ch & (~0x3F)) != 0x80) { goto fail; } U->UnicodePoint <<= 6; U->UnicodePoint |= (ch & 0x3F); // If last continuation character ... if (--(U->i) == 0) { // If codepoint out of range ... if ((U->UnicodePoint < U->Min) || (U->UnicodePoint > 0x10FFFF) || IsSurrogate(U->UnicodePoint)) { goto fail; } return false /* No more needed */; } return true; /* More needed */ fail: U->UnicodePoint = -1; U->i = 0; U->e = true; return false /* No more needed */; } /* return 0:OK, else error */ bool ConvertUTF8toUnicodeCodepoints(const char *UTF8, size_t Length, uint32_t *CodePoints, size_t *OutLen) { UTF_T U = { 0 }; *OutLen = 0; for (size_t i = 0; i < Length;) { while (Put8(&U, UTF8[i++])) { // Needed bytes not available? if (i >= Length) { return true; } } if (U.e) break; CodePoints[(*OutLen)++] = U.UnicodePoint; } return U.e; } This is based on some old code, please advise as it may not be up to current standards. Not the prettiest with goto and magic numbers. What is nice about this approach is rather than CodePoints[(*OutLen)++] = U.UnicodePoint for consuming the codepoint, if one wanted to extract UTF16 (BE or LE), one could easily write consumer code for the UTF_T block and not need to change to the UTF8 -> codepoint part.
I would use the Unicode manipulation functions of GLib, a LGPL-licensed utility library. It sounds like g_utf8_to_ucs4() is what you are looking for.