Regular expression needs an extra whitespace at the end - c

Consider this code:
#include <pcre.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char **argv) {
char **ret = NULL, **t;
char *buffer;
pcre *re;
const char *error;
int erroffset, rc = 1, arraylength = 0, ovector[2], i = 0;
const char *string = "WORD";
buffer = malloc(strlen(string)+1);
re = pcre_compile("[A-Za-z0-9]+|\\\"[A-Za-z0-9\\s\\.'\\?]+\\\"", PCRE_MULTILINE, &error, &erroffset, NULL);
if (re == NULL ) printf ("pcre_compile error: %s\n", error);
while (rc > 0) {
rc = pcre_exec(re, NULL, string, strlen(string), i, 0, ovector, 2);
bzero(buffer, strlen(string));
pcre_copy_substring(string, ovector, rc, 0, buffer, strlen(string));
if (rc > 0) {
printf("BUFFER: %s\n", buffer);
}
i = ovector[1];
}
return 0;
}
The output of this is:
[bart#localhost tests]$ ./pcre
BUFFER:
I expect to get WORD. When I added a whitespace character after WORD:
const char *string = "WORD ";
it does work:
[bart#localhost tests]$ ./pcre
BUFFER: WORD
Also, when I add more words, it works.
I tried to test my regular expression here, and it tells me it should work without the whitespace character.
What am I missing here?
Update When I change my regex to [A-Za-z0-9]+ it still doesn't work without the whitespace.

The problem was in pcre_copy_substring, the buffer was too small. This:
pcre_copy_substring(string, ovector, rc, 0, buffer, strlen(string)+1);
fixed it.

Related

Replace all matches in pcre2_substitute in C

I replace the first occurrence of the match with pcre2_substitute,
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdio.h>
#include <string.h>
#include <pcre2.h>
int main(int argc, char **argv)
{
PCRE2_SPTR pattern = "(\\d+)";
PCRE2_SPTR subject = "1 something 849 for 993";
PCRE2_SPTR replacement = "XXX";
pcre2_code *re;
int errornumber;
int i;
int rc;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
size_t replacement_length = strlen((char *)replacement);
pcre2_match_data *match_data;
subject_length = strlen((char *)subject);
PCRE2_UCHAR output[1024] = "";
PCRE2_SIZE outlen = sizeof(output) / sizeof(PCRE2_UCHAR);
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
}
match_data = pcre2_match_data_create_from_pattern(re, NULL);
rc = pcre2_substitute(re, subject, subject_length, 0, 0, match_data, NULL, replacement,
replacement_length, output, &outlen);
printf("Output: %s", output);
return 0;
}
I know that I should repeat pcre2_substitute in a loop for replacing the next match, but I am not sure about the safest way to feed output as the subject of the next step.
You can replace all with a single call to pcre2_substitute using extended option PCRE2_SUBSTITUTE_GLOBAL:
rc = pcre2_substitute(re, subject, subject_length, 0,
PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED,
match_data, NULL, replacement, replacement_length, output, &outlen);

EVP_DecryptUpdate is giving segmentation fault

In c while using the openssl/evp.h library.
If I am doing
EVP_CIPHER_CTX_new();
EVP_DecryptInit(ctx, EVP_aes_256_wrap_pad(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, 32);
I am getting a seg fault in the last line but when I change the EVP_aes_256_wrap_pad() to EVP_aes_128_ecb() and keeping all other parameters same seg fault is gone.
Here is an example code
Working
#include <openssl/evp.h>
#include <stdio.h>
#include <string.h>
void dump_head(unsigned char *buf, size_t len)
{
unsigned end, i;
for (end = len; end > 0; end--)
if (buf[end-1] != 0)
break;
printf("buf = {");
for (i = 0; i < end; i++)
printf(" %02hhx,", buf[i]);
printf(" }\n");
}
int main(void)
{
unsigned char key[] = "0123456789abcdef";
unsigned char iv[] = "1234567887654321";
unsigned char indata[32] = "0123456789abcdeffedcba9876543210";
unsigned char buf[4096];
unsigned pos;
int cipher_len;
EVP_CIPHER_CTX *ctx;
ctx = EVP_CIPHER_CTX_new();
EVP_DecryptInit_ex(ctx, EVP_aes_128_ecb(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, indata, 32);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
printf("Final!\n");
memset(buf, 0, sizeof(buf));
EVP_DecryptFinal_ex(ctx, buf, &cipher_len);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
return 0;
}
Not working
#include <openssl/evp.h>
#include <stdio.h>
#include <string.h>
void dump_head(unsigned char *buf, size_t len)
{
unsigned end, i;
for (end = len; end > 0; end--)
if (buf[end-1] != 0)
break;
printf("buf = {");
for (i = 0; i < end; i++)
printf(" %02hhx,", buf[i]);
printf(" }\n");
}
int main(void)
{
unsigned char key[] = "0123456789abcdef";
unsigned char iv[] = "1234567887654321";
unsigned char indata[32] = "0123456789abcdeffedcba9876543210";
unsigned char buf[4096];
unsigned pos;
int cipher_len;
EVP_CIPHER_CTX *ctx;
ctx = EVP_CIPHER_CTX_new();
EVP_DecryptInit_ex(ctx, EVP_aes_128_wrap_pad(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, indata, 32);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
printf("Final!\n");
memset(buf, 0, sizeof(buf));
EVP_DecryptFinal_ex(ctx, buf, &cipher_len);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
return 0;
}
Can you please suggest is there anything more that I have to add for EVP_aes_256_wrap_pad().
It's not documented that I can find, and may not actually be supported, but to use a wrap-mode cipher in EVP_{Cipher,Encrypt,Decrypt}* you must first set a flag on the context:
EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW);
and then use Init_ex as you did.
Since you didn't set this, your call to DecryptInit_ex failed, but you ignored the return code, and then called DecryptUpdate on an uninitialized context, which crashes. You should always check the return code from any OpenSSL routine that returns one.
However even when I set this flag, DecryptUpdate returns 0, I suspect because your data is not valid ciphertext for this algorithm (the keywrap algorithms include an integrity check), although I would expect it to put something in the errorstack and it doesn't. I will look more deeply at that if I have time.

posix regular expression for parsing uevent causing error

I am trying to parse uevent using this below code but I think my regular expression is not proper causing regcomp function to fail.
Can anyone help? I am trying to do something like this.
#include <stdio.h>
#include <string.h>
#include <regex.h>
int main ()
{
char * source = "change#/devices/soc/799999.i2c/i2c-3/3-0015/power_supply/battery";
char * regexString = "(?<action>[a-zA-Z]+)#\\/(?<dev_path>.*)\\/(?<subsystem>[a-zA-z]+)\\/(?<name>[a-zA-z]+)";
size_t maxGroups = 4;
regex_t regexCompiled;
regmatch_t groupArray[maxGroups];
if (regcomp(&regexCompiled, regexString, REG_EXTENDED))
{
printf("Could not compile regular expression.\n");
return 1;
};
regfree(&regexCompiled);
return 0;
}
I am getting "Could not compile regular expression.". It means regcomp didn't recognize the regex.
When I report on the error using the code:
#include <stdio.h>
#include <string.h>
#include <regex.h>
int main(void)
{
//char * source = "change#/devices/soc/799999.i2c/i2c-3/3-0015/power_supply/battery";
char * regexString = "(?<action>[a-zA-Z]+)#\\/(?<dev_path>.*)\\/(?<subsystem>[a-zA-z]+)\\/(?<name>[a-zA-z]+)";
//size_t maxGroups = 4;
regex_t regexCompiled;
//regmatch_t groupArray[maxGroups];
int rc;
if ((rc = regcomp(&regexCompiled, regexString, REG_EXTENDED)) != 0)
{
char buffer[1024];
regerror(rc, &regexCompiled, buffer, sizeof(buffer));
printf("Could not compile regular expression (%d: %s).\n", rc, buffer);
return 1;
}
regfree(&regexCompiled);
return 0;
}
I get the output:
Could not compile regular expression (13: repetition-operator operand invalid).
The problem is in the notation (? you are using:
"(?<action>[a-zA-Z]+)#\\/(?<dev_path>.*)\\/(?<subsystem>[a-zA-z]+)\\/(?<name>[a-zA-z]+)"
That notation is for PCRE and not POSIX. And PCRE uses ? after ( precisely because it isn't valid in other regex systems (such as POSIX).
So, if you want to use PCRE regexes, install and use the PCRE library.
Otherwise, you'll need to use:
"([a-zA-Z]+)#\\/(.*)\\/([a-zA-z]+)\\/([a-zA-z]+)"
With that in place, and noting that you need a regmatch_t for the whole of the string that's matched plus 4 captured groups (for a total of 5 captures), you can write:
#include <stdio.h>
#include <string.h>
#include <regex.h>
int main(void)
{
char *source = "change#/devices/soc/799999.i2c/i2c-3/3-0015/power_supply/battery";
// char * regexString = "(?<action>[a-zA-Z]+)#\\/(?<dev_path>.*)\\/(?<subsystem>[a-zA-z]+)\\/(?<name>[a-zA-z]+)";
size_t maxGroups = 5;
char *regexString = "([a-zA-Z]+)#\\/(.*)\\/([a-zA-z]+)\\/([a-zA-z]+)";
regex_t regexCompiled;
regmatch_t groupArray[maxGroups];
int rc;
if ((rc = regcomp(&regexCompiled, regexString, REG_EXTENDED)) != 0)
{
char buffer[1024];
regerror(rc, &regexCompiled, buffer, sizeof(buffer));
printf("Could not compile regular expression (%d: %s).\n", rc, buffer);
return 1;
}
if ((rc = regexec(&regexCompiled, source, maxGroups, groupArray, 0)) != 0)
{
char buffer[1024];
regerror(rc, &regexCompiled, buffer, sizeof(buffer));
printf("Could not execute regular expression (%d: %s).\n", rc, buffer);
return 1;
}
printf("Match successful:\n");
for (size_t i = 0; i < maxGroups; i++)
{
int so = groupArray[i].rm_so;
int eo = groupArray[i].rm_eo;
printf("%zu: %d..%d [%.*s]\n", i, so, eo, eo - so, &source[so]);
}
regfree(&regexCompiled);
return 0;
}
and the output is:
Match successful:
0: 0..64 [change#/devices/soc/799999.i2c/i2c-3/3-0015/power_supply/battery]
1: 0..6 [change]
2: 8..43 [devices/soc/799999.i2c/i2c-3/3-0015]
3: 44..56 [power_supply]
4: 57..64 [battery]

pcre C API only return first match

#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30
#define SRCBUFFER 1024*1024
int main(int argc, char **argv){
pcre *re;
const char *error;
int erroffset;
int ovector[OVECCOUNT];
int rc, i;
if (argc != 2){
fprintf(stderr, "Usage : %s PATTERN\n", argv[0]);
return 1;
}
char *src=malloc(SRCBUFFER);
int srclen = fread(src, sizeof(char), SRCBUFFER, stdin);
re = pcre_compile(argv[1], 0, &error, &erroffset, NULL);
if (re == NULL){
fprintf(stderr, "PCRE compilation failed at offset %d: %s\n", erroffset, error);
return 1;
}
rc = pcre_exec(re, NULL, src, srclen, 0, 0, ovector, OVECCOUNT);
if (rc < 0){
if (rc == PCRE_ERROR_NOMATCH) fprintf(stderr, "Sorry, no match...\n");
else fprintf(stderr, "Matching error %d\n", rc);
return 1;
}
for (i = 0; i < rc; i++){
char *substring_start = src + ovector[2 * i];
int substring_length = ovector[2 * i + 1] - ovector[2 * i];
fprintf(stdout, "%2d: %.*s\n", i, substring_length, substring_start);
}
return 0;
}
run it
echo "apple banana africa" | ./program '\ba\w+\b'
and it print
0: apple
I've tried to use the PCRE_MULTILINE option,but no use.How to make it print all matchs?
It sounds like what you're looking for is the equivalent of the Perl /g regex flag to repeat the match as many times as possible and return the results of all the matches. I don't believe PCRE has anything like that.
Instead, you will need to add a loop around pcre_exec. Each time you call it, it will return the byte offset of the start and end of the match. You want to then run pcre_exec again on the string starting at the end of the match. Repeat until pcre_exec doesn't match.

How can I use PCRE to get all match groups?

I am inexperienced with using C, and I need to use PCRE to get matches.
Here is a sample of my source code:
int test2()
{
const char *error;
int erroffset;
pcre *re;
int rc;
int i;
int ovector[OVECCOUNT];
char *regex = "From:([^#]+)#([^\r]+)";
char str[] = "From:regular.expressions#example.com\r\n"\
"From:exddd#43434.com\r\n"\
"From:7853456#exgem.com\r\n";
re = pcre_compile (
regex, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
0); /* use default character tables */
if (!re) {
printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
return -1;
}
rc = pcre_exec (
re, /* the compiled pattern */
0, /* no extra data - pattern was not studied */
str, /* the string to match */
strlen(str), /* the length of the string */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOMATCH:
printf("String didn't match");
break;
default:
printf("Error while matching: %d\n", rc);
break;
}
free(re);
return -1;
}
for (i = 0; i < rc; i++) {
printf("%2d: %.*s\n", i, ovector[2*i+1] - ovector[2*i], str + ovector[2*i]);
}
}
In this demo, the output is only:
0: From:regular.expressions#example.com
1: regular.expressions
2: example.com
I want to output all of the matches; how can I do that?
I use a class to wrap PCRE to make this easier, but after the pcre_exec, the ovector contains the substring indexes you need to find the matches within the original string.
So it would be something like:
#include <string>
#include <iostream>
#include "pcre.h"
int main (int argc, char *argv[])
{
const char *error;
int erroffset;
pcre *re;
int rc;
int i;
int ovector[100];
char *regex = "From:([^#]+)#([^\r]+)";
char str[] = "From:regular.expressions#example.com\r\n"\
"From:exddd#43434.com\r\n"\
"From:7853456#exgem.com\r\n";
re = pcre_compile (regex, /* the pattern */
PCRE_MULTILINE,
&error, /* for error message */
&erroffset, /* for error offset */
0); /* use default character tables */
if (!re)
{
printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
return -1;
}
unsigned int offset = 0;
unsigned int len = strlen(str);
while (offset < len && (rc = pcre_exec(re, 0, str, len, offset, 0, ovector, sizeof(ovector))) >= 0)
{
for(int i = 0; i < rc; ++i)
{
printf("%2d: %.*s\n", i, ovector[2*i+1] - ovector[2*i], str + ovector[2*i]);
}
offset = ovector[1];
}
return 1;
}
note: last parameter of pcre_exec() must be element-count, not sizeof() ! ( http://www.pcre.org/readme.txt )

Resources