Replace all matches in pcre2_substitute in C - c

I replace the first occurrence of the match with pcre2_substitute,
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdio.h>
#include <string.h>
#include <pcre2.h>
int main(int argc, char **argv)
{
PCRE2_SPTR pattern = "(\\d+)";
PCRE2_SPTR subject = "1 something 849 for 993";
PCRE2_SPTR replacement = "XXX";
pcre2_code *re;
int errornumber;
int i;
int rc;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
size_t replacement_length = strlen((char *)replacement);
pcre2_match_data *match_data;
subject_length = strlen((char *)subject);
PCRE2_UCHAR output[1024] = "";
PCRE2_SIZE outlen = sizeof(output) / sizeof(PCRE2_UCHAR);
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
}
match_data = pcre2_match_data_create_from_pattern(re, NULL);
rc = pcre2_substitute(re, subject, subject_length, 0, 0, match_data, NULL, replacement,
replacement_length, output, &outlen);
printf("Output: %s", output);
return 0;
}
I know that I should repeat pcre2_substitute in a loop for replacing the next match, but I am not sure about the safest way to feed output as the subject of the next step.

You can replace all with a single call to pcre2_substitute using extended option PCRE2_SUBSTITUTE_GLOBAL:
rc = pcre2_substitute(re, subject, subject_length, 0,
PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED,
match_data, NULL, replacement, replacement_length, output, &outlen);

Related

EVP_DecryptUpdate is giving segmentation fault

In c while using the openssl/evp.h library.
If I am doing
EVP_CIPHER_CTX_new();
EVP_DecryptInit(ctx, EVP_aes_256_wrap_pad(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, 32);
I am getting a seg fault in the last line but when I change the EVP_aes_256_wrap_pad() to EVP_aes_128_ecb() and keeping all other parameters same seg fault is gone.
Here is an example code
Working
#include <openssl/evp.h>
#include <stdio.h>
#include <string.h>
void dump_head(unsigned char *buf, size_t len)
{
unsigned end, i;
for (end = len; end > 0; end--)
if (buf[end-1] != 0)
break;
printf("buf = {");
for (i = 0; i < end; i++)
printf(" %02hhx,", buf[i]);
printf(" }\n");
}
int main(void)
{
unsigned char key[] = "0123456789abcdef";
unsigned char iv[] = "1234567887654321";
unsigned char indata[32] = "0123456789abcdeffedcba9876543210";
unsigned char buf[4096];
unsigned pos;
int cipher_len;
EVP_CIPHER_CTX *ctx;
ctx = EVP_CIPHER_CTX_new();
EVP_DecryptInit_ex(ctx, EVP_aes_128_ecb(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, indata, 32);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
printf("Final!\n");
memset(buf, 0, sizeof(buf));
EVP_DecryptFinal_ex(ctx, buf, &cipher_len);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
return 0;
}
Not working
#include <openssl/evp.h>
#include <stdio.h>
#include <string.h>
void dump_head(unsigned char *buf, size_t len)
{
unsigned end, i;
for (end = len; end > 0; end--)
if (buf[end-1] != 0)
break;
printf("buf = {");
for (i = 0; i < end; i++)
printf(" %02hhx,", buf[i]);
printf(" }\n");
}
int main(void)
{
unsigned char key[] = "0123456789abcdef";
unsigned char iv[] = "1234567887654321";
unsigned char indata[32] = "0123456789abcdeffedcba9876543210";
unsigned char buf[4096];
unsigned pos;
int cipher_len;
EVP_CIPHER_CTX *ctx;
ctx = EVP_CIPHER_CTX_new();
EVP_DecryptInit_ex(ctx, EVP_aes_128_wrap_pad(), NULL, key, iv);
EVP_DecryptUpdate(ctx, buf, &cipher_len, indata, 32);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
printf("Final!\n");
memset(buf, 0, sizeof(buf));
EVP_DecryptFinal_ex(ctx, buf, &cipher_len);
printf("Got %d\n", cipher_len);
dump_head(buf, sizeof(buf));
return 0;
}
Can you please suggest is there anything more that I have to add for EVP_aes_256_wrap_pad().
It's not documented that I can find, and may not actually be supported, but to use a wrap-mode cipher in EVP_{Cipher,Encrypt,Decrypt}* you must first set a flag on the context:
EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW);
and then use Init_ex as you did.
Since you didn't set this, your call to DecryptInit_ex failed, but you ignored the return code, and then called DecryptUpdate on an uninitialized context, which crashes. You should always check the return code from any OpenSSL routine that returns one.
However even when I set this flag, DecryptUpdate returns 0, I suspect because your data is not valid ciphertext for this algorithm (the keywrap algorithms include an integrity check), although I would expect it to put something in the errorstack and it doesn't. I will look more deeply at that if I have time.

Regular expression needs an extra whitespace at the end

Consider this code:
#include <pcre.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char **argv) {
char **ret = NULL, **t;
char *buffer;
pcre *re;
const char *error;
int erroffset, rc = 1, arraylength = 0, ovector[2], i = 0;
const char *string = "WORD";
buffer = malloc(strlen(string)+1);
re = pcre_compile("[A-Za-z0-9]+|\\\"[A-Za-z0-9\\s\\.'\\?]+\\\"", PCRE_MULTILINE, &error, &erroffset, NULL);
if (re == NULL ) printf ("pcre_compile error: %s\n", error);
while (rc > 0) {
rc = pcre_exec(re, NULL, string, strlen(string), i, 0, ovector, 2);
bzero(buffer, strlen(string));
pcre_copy_substring(string, ovector, rc, 0, buffer, strlen(string));
if (rc > 0) {
printf("BUFFER: %s\n", buffer);
}
i = ovector[1];
}
return 0;
}
The output of this is:
[bart#localhost tests]$ ./pcre
BUFFER:
I expect to get WORD. When I added a whitespace character after WORD:
const char *string = "WORD ";
it does work:
[bart#localhost tests]$ ./pcre
BUFFER: WORD
Also, when I add more words, it works.
I tried to test my regular expression here, and it tells me it should work without the whitespace character.
What am I missing here?
Update When I change my regex to [A-Za-z0-9]+ it still doesn't work without the whitespace.
The problem was in pcre_copy_substring, the buffer was too small. This:
pcre_copy_substring(string, ovector, rc, 0, buffer, strlen(string)+1);
fixed it.

sendarp with multithreading function is not working for whole subnet (wifi environment)

I was trying to fetch Mac addrs of all the devices present in WiFi environment(whole subnet).
Initially when i ran the code approx it took 10 mins to get the result for whole subnet so in order to reduce the time i used multithreaded concept in windows machine but this method is not at all working.
I am pasting the code snippet below.
Even i tried different logic like running 255, 100, 50 ,2 threads at a time but still it failed.
I suspect synchronization issue in this but i don't have any idea to resolve this, so please help me in this getting done.
DWORD WINAPI GetMacAddrOfSubNet(LPVOID lpParam)
{
DWORD dwRetVal;
IPAddr DestIp = 0;
IPAddr SrcIp = 0; /* default for src ip */
ULONG MacAddr[2]; /* for 6-byte hardware addresses */
ULONG PhysAddrLen = 6; /* default to length of six bytes */
char look[100];
strcpy(look ,(char *)lpParam);
DestIp = inet_addr(look);
memset(&MacAddr, 0xff, sizeof (MacAddr));
/*Pinging particular ip and retrning mac addrs if response is thr*/
dwRetVal = SendARP(DestIp, SrcIp, &MacAddr, &PhysAddrLen);
if (dwRetVal == NO_ERROR)
{
/**/
}
return 0;
}
extern "C" __declspec(dllexport) int __cdecl PopulateARPTable(char *IpSubNetMask)
{
char ipn[100];
char buffer[10];
unsigned int k;
DWORD dwThreadIdArray[260];
HANDLE hThreadArray[260];
/*Run 255 threads at once*/
for (k=1; k<255; k++)
{
itoa(k, buffer, 10);
strcpy(ipn, IpSubNetMask);
strcat(ipn, ".");
strcat(ipn, buffer);
/*Thread creation */
hThreadArray[k] = CreateThread( NULL, 0, GetMacAddrOfSubNet, ipn, 0, &dwThreadIdArray[k]);
if (hThreadArray[k] == NULL)
{
//ExitProcess(3);
}
}
WaitForMultipleObjects(255, hThreadArray, TRUE, INFINITE);
return 0;
}
The ipn buffer only exists once. You are using and modifying it as parameter for your (up to) 255 threads. And you expect that the statement strcpy(look ,(char *)lpParam) in the thread will be performed before the main thread modifies ipnagain for calling the next thread. But this is not the case, at least it's not guaranteed. So your threads may use wrong parameters.
Either use different buffers for each thread, or implement a synchronization, that ensures that the parameter has been copied by the thread before main thread modifies the buffer again.
I've done this already for Excel, wrote a multi-threaded DLL that in addition to getting MAC addresses, will do DNS reverse hostname and ICMP ping RT.
Lemme know if you want the VBA code that goes with it. The source for the C-based DLL (which you may be happy with as a template) is:
/*
ExcelNet library: Provide threaded networking functions
NOTE: Serially doing these on the network involves serially waiting for timeouts, much ouchies!
Written by: Danny Holstein
*/
#if 1 // header stuff
#include <windows.h>
#include <iphlpapi.h>
#include <icmpapi.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#define MSEXPORT __declspec(dllexport)
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
#define BUFSZ 256
typedef struct {
char ip[BUFSZ]; // IP addresses in dotted notation.
int ipSize; // size of IP address buffer
char data[BUFSZ]; // general network data (MAC, hostname, etc)
int dataSize; // size of data buffer ^
int err_no; // WinAPI error number
int (*func)(LPCSTR Addr, char* buf, int BufSz); // function address, &GetNameInfos, &GetARP or &GetICMP
} NET_DATA;
int GetARP(LPCSTR Addr, char* Buf, int BufSz);
int GetNameInfos(LPCSTR Addr, char* Buf, int BufSz);
int GetICMP(LPCSTR Addr, char* Buf, int BufSz);
char msg_dbg[BUFSZ], dan[BUFSZ];
#define DEBUG_PRT() {snprintf(msg_dbg, BUFSZ, "lineno = %d\tfunc = %s", __LINE__ , __func__); MessageBox(0, msg_dbg, "debug", 0);}
#define DEBUG_MSG(msg) {snprintf(msg_dbg, BUFSZ, "msg=\"%s\"\tlineno = %d\tfunc = %s", msg, __LINE__ , __func__); MessageBox(0, msg_dbg, "debug", 0);}
#if 0 // documentation indicates malloc/realloc/free shouldn't be used in DLLs
#define malloc(A) malloc(A)
#define realloc(A, B) realloc(A, B)
#define free(A) free(A)
// #define NOMEMLEAK // dudint work
#else // kinda works, when NOT allocating all the NET_DATA structure elements
#define malloc(A) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY | HEAP_GENERATE_EXCEPTIONS, A)
#define realloc(A, B) HeapReAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, A, B)
#define free(A) (void) HeapFree(A, HEAP_GENERATE_EXCEPTIONS, NULL)
#define NOMEMLEAK
#endif
#endif
MSEXPORT void __stdcall ArrayDGH(SAFEARRAY **IntArr, SAFEARRAY **StrArr)
{
SAFEARRAY *A = (SAFEARRAY*) (*IntArr);
SAFEARRAY *S = (SAFEARRAY*) (*StrArr);
int n = (*A).rgsabound[0].cElements;
snprintf(dan, BUFSZ, "a num elems = %d, str num elems = %d", n, (*S).rgsabound[0].cElements); DEBUG_MSG(dan);
// for (int i=0; i<n; i++) {snprintf(dan, BUFSZ, "elem[%d] = %d", i, ((int*) (*A).pvData)[i]); DEBUG_MSG(dan);}
OLECHAR* str[] = {L"Da",L"Fuk",L"Waz",L"Dat?",L"dot",L"dot"};
for (int i=0; i<n; i++) {SysReAllocString(&(((BSTR*) (*S).pvData)[i]), str[i]);}
}
DWORD dwMilliseconds = 10000;
MSEXPORT void __stdcall SetTIMO(DWORD Timo) {dwMilliseconds = Timo;}
MSEXPORT bool __stdcall ExcelSendARP(LPCSTR Addr, BSTR* MAC)
{
char buf[BUFSZ];
int err = GetARP(Addr, buf, BUFSZ);
*MAC = SysAllocStringByteLen(buf, strlen(buf)); // avoid WIDE to ANSI stuff
return !err;
}
MSEXPORT bool __stdcall ExcelICMPRT(LPCSTR Addr, BSTR* RoundTrip)
{
char buf[BUFSZ];
int err = GetICMP(Addr, buf, BUFSZ);
*RoundTrip = SysAllocStringByteLen(buf, strlen(buf)); // avoid WIDE to ANSI stuff
return !err;
}
MSEXPORT bool __stdcall ExcelGetNameInfo(LPCSTR Addr, BSTR* NameInfo)
{
char buf[BUFSZ];
#ifdef TEST_FUNC_PTR
int (*FunAddr[2])(LPCSTR Addr, char** buf, int BufSz);
FunAddr[0] = &GetARP; FunAddr[1] = &GetNameInfos; // DRY code hooks
int err = (*(FunAddr[1]))(Addr, buf, BUFSZ);
#else
int err = GetNameInfos(Addr, buf, BUFSZ);
#endif
*NameInfo = SysAllocStringByteLen(buf, strlen(buf)); // avoid WIDE to ANSI stuff
return !err;
}
int GetNameInfos(LPCSTR Addr, char* buf, int BufSz)
{
ULONG inet; DWORD resp = 0; HOSTENT *tHI;
struct in_addr addr = { 0 };
if ((inet = inet_addr(Addr)) == INADDR_NONE) {
if (strcpy_s(buf, BufSz, "inet_addr failed and returned INADDR_NONE")) DEBUG_MSG("strcpy_s error!");
return inet;
}
addr.s_addr = inet; tHI = gethostbyaddr((char *) &addr, 4, AF_INET);
if (tHI == NULL) { // no reponse for this IP condition, decode condition and place in PCHAR buf
resp = WSAGetLastError();
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, 0, resp, 0, buf, BufSz, 0);
return resp;
}
_snprintf_s(buf, BufSz-1, _TRUNCATE, "%s", tHI->h_name); // place hostname in PCHAR buf
return resp; // <- resp = 0, we have a hostname associated with this IP, SUCCESS!
}
int GetARP(LPCSTR Addr, char* buf, int BufSz)
{
#define BUFLEN 6
ULONG pMacAddr[BUFLEN], inet, BufLen = BUFLEN; DWORD resp = 0;
if ((inet = inet_addr(Addr)) == INADDR_NONE) {
if (strcpy_s(buf, BufSz, "inet_addr failed and returned INADDR_NONE")) DEBUG_MSG("strcpy_s error!");
return inet;
}
resp = SendARP(inet, 0, pMacAddr, &BufLen);
if (resp) { // no reponse for this IP condition, decode condition and place in PCHAR buf
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, 0, resp, 0, buf, BufSz, 0);
return resp;
}
UCHAR *pMacAddrBytes = (UCHAR *) pMacAddr;
_snprintf_s(buf, BufSz, _TRUNCATE, "%02x:%02x:%02x:%02x:%02x:%02x", pMacAddrBytes[0], pMacAddrBytes[1], pMacAddrBytes[2],
pMacAddrBytes[3], pMacAddrBytes[4], pMacAddrBytes[5]); // place MAC in PCHAR buf
return resp; // <- resp = 0, we have a MAC associated with this IP, SUCCESS!
}
int GetICMP(LPCSTR Addr, char* buf, int BufSz)
{
HANDLE hIcmpFile;
ULONG inet; DWORD resp = 0;
char SendData[] = "Data Buffer";
LPVOID ReplyBuffer = NULL;
DWORD ReplySize = 0;
if ((inet = inet_addr(Addr)) == INADDR_NONE) {
if (strcpy_s(buf, BufSz, "inet_addr failed and returned INADDR_NONE")) DEBUG_MSG("strcpy_s error!");
return inet;
}
hIcmpFile = IcmpCreateFile();
if (hIcmpFile == INVALID_HANDLE_VALUE) {
resp = GetLastError();
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, 0, resp, 0, buf, BufSz, 0);
return resp;
}
// Allocate space for at a single reply
ReplySize = sizeof (ICMP_ECHO_REPLY) + sizeof (SendData) + 8;
ReplyBuffer = (VOID *) malloc(ReplySize);
resp = IcmpSendEcho2(hIcmpFile, NULL, NULL, NULL,
inet, SendData, sizeof (SendData), NULL,
ReplyBuffer, ReplySize, dwMilliseconds);
PICMP_ECHO_REPLY pEchoReply = (PICMP_ECHO_REPLY) ReplyBuffer;
if (resp == 0) { // no reponse for this IP condition, decode condition and place in PCHAR buf
resp = pEchoReply->Status; // WSAGetLastError();
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, 0, resp, 0, buf, BufSz, 0);
return resp;
}
_snprintf_s(buf, BufSz-1, _TRUNCATE, "%d", ((PICMP_ECHO_REPLY) ReplyBuffer)->RoundTripTime); // place roundtrip time in PCHAR buf (ASCII representation)
IcmpCloseHandle(hIcmpFile);
return 0; // we have a RT time in milliseconds associated with this IP, SUCCESS!
}
NET_DATA *NetData;
DWORD WINAPI tGetDATA(LPVOID NetData)
{
char buf[BUFSZ];
NET_DATA *m = NetData; // stupid me, I had allocated the space, instead of address of, not thinking it all disappears on exit
m->err_no = (*(m->func))(m->ip, buf, BUFSZ); // GetARP, GetNameInfos or GetICMP
if (_snprintf_s(m->data, m->dataSize-1, _TRUNCATE, "%s", buf) == -1) DEBUG_MSG("_snprintf_s error!");
// A resources error may have hit AFTER the calling function has returned, including having the thread/memory deallocated in the calling function
return 0;
}
MSEXPORT BSTR __stdcall ExcelRngNetData(UCHAR *list, ULONG *ErrNos, BSTR *DataArry, DWORD dwMilliseconds, char sep, char FunctType)
{ // take list of IP addresses (newline-separated), create thread for each to get network data. Return results in (sep-separated) BSTR.
int i=0, j, NumIPs=0, k = strlen(list), l=0;
#define NMSZ 100
char nm[NMSZ];
void* FuncAddr[3]; // DRY code hooks
FuncAddr[0] = &GetARP; FuncAddr[1] = &GetNameInfos; FuncAddr[2] = &GetICMP;
while (i<k && sscanf_s(list+i, "%[^\n]\n", nm, NMSZ)){i += strnlen_s(nm, NMSZ) + 1; NumIPs++;} // count newline-separated items before doing malloc(), because realloc() is REALLY resource intensive
NetData = malloc(NumIPs * sizeof(NET_DATA)); i = 0;
while (i<k && sscanf_s(list+i, "%[^\n]\n", nm, NMSZ)){ // load calling routines list into structures
j = strnlen_s(nm, NMSZ) + 1; i += j;
NetData[l].err_no = WAIT_TIMEOUT; // easy way to preset the error to a TIMEOUT, if the thread successfully completes before the timeout, this will be set to reflect its timeout
NetData[l].dataSize = BUFSZ;
NetData[l].func = FuncAddr[FunctType]; // DRY (Don't Repeat Yourself) code hook
if (strncpy_s(NetData[l].ip, (NetData[l].ipSize = BUFSZ), nm, j)) DEBUG_MSG("strcpy_s error!");
l++;
}
HANDLE *tHandles = malloc(NumIPs * sizeof(HANDLE)); DWORD ThreadId;
for (i=0; i<NumIPs; i++){
tHandles[i] = CreateThread(NULL, 0, tGetDATA, &(NetData[i]), 0, &ThreadId);
if (tHandles[i] == NULL) {DEBUG_MSG("Could not create threads!\nExiting now"); ExitProcess(3);}
}
if(WaitForMultipleObjects(NumIPs, tHandles, TRUE, dwMilliseconds) == WAIT_FAILED) {
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, 0, GetLastError(), 0, dan, BUFSZ, 0); DEBUG_MSG(dan);
// Prolly means too many threads, kicked in when I exceeded 64; ERROR_INVALID_PARAMETER = "The parameter is incorrect"
}
#define CHUNK 1024
#define DATASZ 256 // tested with intentionally small size to check "safe" string functions
wchar_t wcs[DATASZ]; char MacChunk[DATASZ];
char *ans = malloc(CHUNK); int anslen = 0, anssz = CHUNK;
char separator[2]; separator[0] = sep; separator[1] = 0;
for(i=0; i<NumIPs; i++) { // build return BSTR and load array with data
CloseHandle(tHandles[i]); ErrNos[i] = NetData[i].err_no;
if (strncpy_s(MacChunk, DATASZ-1, NetData[i].err_no == 0 ? NetData[i].data : "", NetData[i].dataSize-1)) DEBUG_MSG("strcpy_s error!");
if (i<NumIPs-1 && sep != 0) if (strncat_s(MacChunk, DATASZ-1, separator, 1)) DEBUG_MSG("strcpy_s error!");
while (strnlen_s(MacChunk, DATASZ) > anssz - anslen -1) ans = realloc(ans, (anssz += CHUNK)); // choose CHUNK size to avoid constant realloc(), because realloc() is REALLY resource intensive
if (strncpy_s(&(ans[anslen]), DATASZ-1, MacChunk, DATASZ-1)) DEBUG_MSG("strcpy_s error!"); anslen += strnlen_s(MacChunk, DATASZ); // return data in returned BSTR
MultiByteToWideChar(CP_UTF8, 0,
ErrNos[i] == WAIT_TIMEOUT ? "The wait operation timed out." : NetData[i].data,
-1, wcs, DATASZ-1); // return data in supplied array
SysReAllocString(&DataArry[i], wcs);
}
BSTR r = SysAllocStringByteLen(ans, strlen(ans));
#ifdef NOMEMLEAK
free(NetData); free(tHandles); free(ans);
#endif
return r;
}

pcre C API only return first match

#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30
#define SRCBUFFER 1024*1024
int main(int argc, char **argv){
pcre *re;
const char *error;
int erroffset;
int ovector[OVECCOUNT];
int rc, i;
if (argc != 2){
fprintf(stderr, "Usage : %s PATTERN\n", argv[0]);
return 1;
}
char *src=malloc(SRCBUFFER);
int srclen = fread(src, sizeof(char), SRCBUFFER, stdin);
re = pcre_compile(argv[1], 0, &error, &erroffset, NULL);
if (re == NULL){
fprintf(stderr, "PCRE compilation failed at offset %d: %s\n", erroffset, error);
return 1;
}
rc = pcre_exec(re, NULL, src, srclen, 0, 0, ovector, OVECCOUNT);
if (rc < 0){
if (rc == PCRE_ERROR_NOMATCH) fprintf(stderr, "Sorry, no match...\n");
else fprintf(stderr, "Matching error %d\n", rc);
return 1;
}
for (i = 0; i < rc; i++){
char *substring_start = src + ovector[2 * i];
int substring_length = ovector[2 * i + 1] - ovector[2 * i];
fprintf(stdout, "%2d: %.*s\n", i, substring_length, substring_start);
}
return 0;
}
run it
echo "apple banana africa" | ./program '\ba\w+\b'
and it print
0: apple
I've tried to use the PCRE_MULTILINE option,but no use.How to make it print all matchs?
It sounds like what you're looking for is the equivalent of the Perl /g regex flag to repeat the match as many times as possible and return the results of all the matches. I don't believe PCRE has anything like that.
Instead, you will need to add a loop around pcre_exec. Each time you call it, it will return the byte offset of the start and end of the match. You want to then run pcre_exec again on the string starting at the end of the match. Repeat until pcre_exec doesn't match.

How can I use PCRE to get all match groups?

I am inexperienced with using C, and I need to use PCRE to get matches.
Here is a sample of my source code:
int test2()
{
const char *error;
int erroffset;
pcre *re;
int rc;
int i;
int ovector[OVECCOUNT];
char *regex = "From:([^#]+)#([^\r]+)";
char str[] = "From:regular.expressions#example.com\r\n"\
"From:exddd#43434.com\r\n"\
"From:7853456#exgem.com\r\n";
re = pcre_compile (
regex, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
0); /* use default character tables */
if (!re) {
printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
return -1;
}
rc = pcre_exec (
re, /* the compiled pattern */
0, /* no extra data - pattern was not studied */
str, /* the string to match */
strlen(str), /* the length of the string */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
if (rc < 0) {
switch (rc) {
case PCRE_ERROR_NOMATCH:
printf("String didn't match");
break;
default:
printf("Error while matching: %d\n", rc);
break;
}
free(re);
return -1;
}
for (i = 0; i < rc; i++) {
printf("%2d: %.*s\n", i, ovector[2*i+1] - ovector[2*i], str + ovector[2*i]);
}
}
In this demo, the output is only:
0: From:regular.expressions#example.com
1: regular.expressions
2: example.com
I want to output all of the matches; how can I do that?
I use a class to wrap PCRE to make this easier, but after the pcre_exec, the ovector contains the substring indexes you need to find the matches within the original string.
So it would be something like:
#include <string>
#include <iostream>
#include "pcre.h"
int main (int argc, char *argv[])
{
const char *error;
int erroffset;
pcre *re;
int rc;
int i;
int ovector[100];
char *regex = "From:([^#]+)#([^\r]+)";
char str[] = "From:regular.expressions#example.com\r\n"\
"From:exddd#43434.com\r\n"\
"From:7853456#exgem.com\r\n";
re = pcre_compile (regex, /* the pattern */
PCRE_MULTILINE,
&error, /* for error message */
&erroffset, /* for error offset */
0); /* use default character tables */
if (!re)
{
printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
return -1;
}
unsigned int offset = 0;
unsigned int len = strlen(str);
while (offset < len && (rc = pcre_exec(re, 0, str, len, offset, 0, ovector, sizeof(ovector))) >= 0)
{
for(int i = 0; i < rc; ++i)
{
printf("%2d: %.*s\n", i, ovector[2*i+1] - ovector[2*i], str + ovector[2*i]);
}
offset = ovector[1];
}
return 1;
}
note: last parameter of pcre_exec() must be element-count, not sizeof() ! ( http://www.pcre.org/readme.txt )

Resources