Working example of substitution using PCRS - c

I need to to substitution in a string in C. It was recommended in one of the answers here How to do regex string replacements in pure C? to use the PCRS library. I downloaded PCRS from here ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib/ but I'm confused as to how to use it. Below is my code (taken from another SE post)
const char *error;
int erroffset;
pcre *re;
int rc;
int i;
int ovector[100];
char *regex = "From:([^#]+).*";
char str[] = "From:regular.expressions#example.com\r\n";
char stringToBeSubstituted[] = "gmail.com";
re = pcre_compile (regex, /* the pattern */
PCRE_MULTILINE,
&error, /* for error message */
&erroffset, /* for error offset */
0); /* use default character tables */
if (!re)
{
printf("pcre_compile failed (offset: %d), %s\n", erroffset, error);
return -1;
}
unsigned int offset = 0;
unsigned int len = strlen(str);
while (offset < len && (rc = pcre_exec(re, 0, str, len, offset, 0, ovector, sizeof(ovector))) >= 0)
{
for(int i = 0; i < rc; ++i)
{
printf("%2d: %.*s\n", i, ovector[2*i+1] - ovector[2*i], str + ovector[2*i]);
}
offset = ovector[1];
}
As opposed to 'pcre_compile' and 'pcre_exec' what functions do I need to use from PCRS?
Thanks.

Simply follow the instructions in the INSTALL file:
To build PCRS, you will need pcre 3.0 or later and gcc.
Installation is easy: ./configure && make && make install
Debug mode can be enabled with --enable-debug.
There is a simple demo application (pcrsed) included.
PCRS provides the following functions documented in the man page pcrs.3:
pcrs_compile
pcrs_compile_command
pcrs_execute
pcrs_execute_list
pcrs_free_job
pcrs_free_joblist
pcrs_strerror
Here's an online version of the man page. To use these functions, include the header file pcrs.h and link your program against the PCRS library using the linker flag -lpcrs.

Related

Compiling an already written C script. Visual Studios 2017

I have an already written script for C that I want to use to go along with Texmod. There was post about it a long time ago but I can't access it. Basically it lets you use TexMod with arguments for the .exe like -log. I have downloaded Visual Studios 2017 and have tried compiling it using the developers console by cd to the folder than cl 'script.c' to compile it. It makes an .exe and .obj but does nothing past that, even when I double click the .exe The problem is I know java and have never done anything in the C language. Here is the code:
#include <stdio.h>
#include <windows.h>
#include <tlhelp32.h>
UINT WINAPI EzGetPid(LPCSTR procName, UINT *pid, UINT size);
int main(int argc, char *argv[])
{
if (argc < 1) {
puts("You must specifie the arguments");
return 1;
}
UINT pid = 0;
if (!EzGetPid("Texmod.exe", &pid, 1)) {
puts("You must open Texmod first.");
return 1;
}
BYTE shellcode_tramp[] = "\x58\x6A\x00\x6A\x00\x68\x00\x00\x00\x00\xFF\xE0";
UINT size_tramp = 12;
char arguments[0x500] = {0};
strcpy(arguments, argv[1]);
HANDLE proc = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
LPVOID remote_tramp = VirtualAllocEx(proc, NULL, 0x1000, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
LPVOID remote_args = (LPVOID)((DWORD)remote_tramp + size_tramp);
*(DWORD*)(&shellcode_tramp[6]) = (DWORD)remote_args;
WriteProcessMemory(proc, remote_tramp, shellcode_tramp, size_tramp, NULL); // Write the trampoline
WriteProcessMemory(proc, remote_args, arguments, strlen(arguments), NULL); // Write the arguments
BYTE firstCall[] = "\xE8\x00\x00\x00\x00\x90";
BYTE secondCall[] = "\xE8\x00\x00\x00\x00\x90";
*(DWORD*)(&firstCall[1]) = (DWORD)remote_tramp - 0x4012E1 - 5;
*(DWORD*)(&secondCall[1]) = (DWORD)remote_tramp - 0x40145B - 5;
WriteProcessMemory(proc, (LPVOID)0x4012E1, firstCall, 6, NULL); // Write first detour call
WriteProcessMemory(proc, (LPVOID)0x40145B, secondCall, 6, NULL); // Write second detour call
CloseHandle(proc);
return 0;
}
UINT WINAPI EzGetPid(LPCSTR procName, UINT *pid, UINT size)
{
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
PROCESSENTRY32 buffer = {0};
buffer.dwSize = sizeof(PROCESSENTRY32);
UINT count = 0;
while (Process32Next(hSnap, &buffer) && count < size) {
if (!strcmp(buffer.szExeFile, procName))
pid[count++] = buffer.th32ProcessID;
}
CloseHandle(hSnap);
return count;
}
p.s. I added the last closed bracket as I thought I may have copied it wrong when I copied it long ago.
Do I need to use the Visual Studios interface to do this? I was wondering if someone that knows C could look at the code I'm trying to compile and help explain anything I am missing or any special instructions as to how to run it.
Thank you very much for all help.
Your program requires arguments in order to run correctly. The reason you're program doesn't do anything is because when double clicking it, you're not passing any arguments. The console is closing before you see the debug message "You must specifie the arguments".
Typically it's much easier to deal with trampoline hooks using an internal injected DLL method rather than this external method. You could start the process in a suspended state then inject your DLL to perform the trampoline hook. Then you can easily modify the function arguments of the function you're hooking.
Here is the code I use for x86 trampoline hooks
bool Detour32(BYTE* src, BYTE* dst, const uintptr_t len)
{
if (len < 5) return false;
DWORD curProtection;
VirtualProtect(src, len, PAGE_EXECUTE_READWRITE, &curProtection);
uintptr_t relativeAddress = dst - src - 5;
*src = 0xE9;
*(uintptr_t*)(src + 1) = relativeAddress;
VirtualProtect(src, len, curProtection, &curProtection);
return true;
}
BYTE* TrampHook32(BYTE* src, BYTE* dst, const uintptr_t len)
{
if (len < 5) return 0;
//Create Gateway
BYTE* gateway = (BYTE*)VirtualAlloc(0, len, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
//write the stolen bytes to the gateway
memcpy_s(gateway, len, src, len);
//Get the gateway to destination address
uintptr_t gatewayRelativeAddr = src - gateway - 5;
// add the jmp opcode to the end of the gateway
*(gateway + len) = 0xE9;
//Write the address of the gateway to the jmp
*(uintptr_t*)((uintptr_t)gateway + len + 1) = gatewayRelativeAddr;
//Perform the detour
Detour32(src, dst, len);
return gateway;
}
Here is an example of how I use it to took OpenGL's SwapBuffers function, in this example we will change the hDc argument to 1337.
typedef BOOL(__stdcall* twglSwapBuffers) (HDC hDc);
twglSwapBuffers owglSwapBuffers;
BOOL __stdcall hkwglSwapBuffers(HDC hDc)
{
hDc = 1337;
return owglSwapBuffers(hDc);
}
owglSwapBuffers = (twglSwapBuffers)mem::TrampHook32((BYTE*)owglSwapBuffers, (BYTE*)hkwglSwapBuffers, 5);

JNI function returning illegal UTF characters at android

Im trying to return string from JNI to android but its returning illegal UTF characters like this:
JNI DETECTED ERROR IN APPLICATION: input is not valid Modified UTF-8:
illegal start byte 0x80
04-12 16:08:09.899 18210-18372 A/art:art/runtime/runtime.cc:427]
string: '���� ���!��"��,"���"���#���$��%���
%��`&��'��H(���)��D*���*��X+��,���,���-��4.��|.��P/��t/���/��01��x1��
2��D2���2���3���4���5��06���6��9���9��;���;��H<��=��0=���=���>��8?��
Here is the code which I am using:
JNIEXPORT jbyteArray Java_pakdata_com_qurantextc_MainActivity_get(
JNIEnv *pEnv,
jobject this,
jint pageNo, jint lang) {
char* buffer=(char*)malloc(10000); // this buffer contains the ayat
register unsigned int pageNumber = pageNo - 1;
char * header=(char*)malloc(1000);
sprintf(header,"[{\"OFFSET\":%d,\"DATA\":\"",pageNumber+1);
strcpy(buffer,header);
// to get the last ayat of the page
// this loop will fetch all ayats of the page
for (int i = start_ayat; i <= end_ayat; i++) {
sprintf(buffer+strlen(buffer),"<div class=\\\"qr0\\\" data-ayat=\\\"%d\\\" id=\"%d\\\"><span>",i+1,i+1);
get(lang, i, buffer + strlen(buffer)); // len is equal to length of buffer ( strlen() )
strcpy(buffer+strlen(buffer),"<\\/span><\\/div>");
}
// char* footer;
sprintf(buffer+strlen(buffer),"<div class=\\\"pagebreak\">%d<a id=\\\"%d\\\"next\\\"href=\\\"\\/page\\/%d\\\"></a><\\/div <\\/div>\"}]",pageNumber+1,pageNumber+1,pageNumber+1);
__android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG","string: '%s'" , buffer);
int l = strlen(buffer);
char c[l];
strcpy(c,replace(buffer,"\r","<br>"));
jbyteArray ret = (*pEnv)->NewByteArray(pEnv,l);
(*pEnv)->SetByteArrayRegion (pEnv,ret, 0, l, c);
const char * errorKind = NULL;
uint8_t utf8 = checkUtfytes(c, &errorKind);
if (errorKind != NULL) {
free(buffer);
return ret;
} else {
free(buffer);
return ret;
}
I have tried using this too:
return = (*pEnv)->NewStringUTF(pEnv,buffer)
but it still contain illegal UTF characters..
Here is my android side code
byte[] ss = get(a, pos);
s= new String(ss,"UTF-8");
Still getting illegal UTF character error.
I have tried encoding on java side but its no help either,
I am posting here because all other methods that are written here i have already tried but it didn't worked.
PLEASE HELP!!!
May be I am late but your code seems to be correct but according to JNI documentation they doesn't support these characters. You have to handle it from server side. Hope it helps.

Fuzzy regex match using TRE

I'm trying to use the TRE library in my C program to perform a fuzzy regex search. I've managed to piece together this code from reading the docs:
regex_t rx;
regcomp(&rx, "(January|February)", REG_EXTENDED);
int result = regexec(&rx, "January", 0, 0, 0);
However, this will match only an exact regex (i.e. no spelling errors are allowed). I don't see any parameter which allows to set the fuzziness in those functions:
int regcomp(regex_t *preg, const char *regex, int cflags);
int regexec(const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags);
How can I set the level of fuzziness (i.e. maximum Levenshtein distance), and how do I get the Levenshtein distance of the match?
Edit: I forgot to mention I'm using the Windows binaries from GnuWin32, which are available only for version 0.7.5. Binaries for 0.8.0 are available only for Linux.
Thanks to #Wiktor Stribiżew, I found out which function I need to use, and I've successfully compiled a working example:
#include <stdio.h>
#include "regex.h"
int main() {
regex_t rx;
regcomp(&rx, "(January|February)", REG_EXTENDED);
regaparams_t params = { 0 };
params.cost_ins = 1;
params.cost_del = 1;
params.cost_subst = 1;
params.max_cost = 2;
params.max_del = 2;
params.max_ins = 2;
params.max_subst = 2;
params.max_err = 2;
regamatch_t match;
match.nmatch = 0;
match.pmatch = 0;
if (!regaexec(&rx, "Janvary", &match, params, 0)) {
printf("Levenshtein distance: %d\n", match.cost);
} else {
printf("Failed to match\n");
}
return 0;
}

How to make backtrace()/backtrace_symbols() print the function names?

The Linux specific backtrace() and backtrace_symbols() allows you to produce a call trace of the program. However, it only prints function addresses, not their names for my program. How can I make them print the function names as well ? I've tried compiling the program with -g as well as -ggdb. The test case below just prints this:
BACKTRACE ------------
./a.out() [0x8048616]
./a.out() [0x8048623]
/lib/libc.so.6(__libc_start_main+0xf3) [0x4a937413]
./a.out() [0x8048421]
----------------------
I'd want the first 2 items to also show the function names, foo and main
Code:
#include <execinfo.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
static void full_write(int fd, const char *buf, size_t len)
{
while (len > 0) {
ssize_t ret = write(fd, buf, len);
if ((ret == -1) && (errno != EINTR))
break;
buf += (size_t) ret;
len -= (size_t) ret;
}
}
void print_backtrace(void)
{
static const char start[] = "BACKTRACE ------------\n";
static const char end[] = "----------------------\n";
void *bt[1024];
int bt_size;
char **bt_syms;
int i;
bt_size = backtrace(bt, 1024);
bt_syms = backtrace_symbols(bt, bt_size);
full_write(STDERR_FILENO, start, strlen(start));
for (i = 1; i < bt_size; i++) {
size_t len = strlen(bt_syms[i]);
full_write(STDERR_FILENO, bt_syms[i], len);
full_write(STDERR_FILENO, "\n", 1);
}
full_write(STDERR_FILENO, end, strlen(end));
free(bt_syms);
}
void foo()
{
print_backtrace();
}
int main()
{
foo();
return 0;
}
The symbols are taken from the dynamic symbol table; you need the -rdynamic option to gcc, which makes it pass a flag to the linker which ensures that all symbols are placed in the table.
(See the Link Options page of the GCC manual, and / or the Backtraces page of the glibc manual.)
Use the addr2line command to map executable addresses to source code filename+line number. Give the -f option to get function names as well.
Alternatively, try libunwind.
The excellent Libbacktrace by Ian Lance Taylor solves this issue. It handles stack unwinding and supports both ordinary ELF symbols and DWARF debugging symbols.
Libbacktrace does not require exporting all symbols, which would be ugly, and ASLR does not break it.
Libbacktrace was originally part of the GCC distribution. Now, a standalone version can be found on Github:
https://github.com/ianlancetaylor/libbacktrace
the answer on the top has a bug
if ret == -1 and errno is EINTER you should try again, but not count ret as copied
(not going to make an account just for this, if you don't like it tough)
static void full_write(int fd, const char *buf, size_t len)
{
while (len > 0) {
ssize_t ret = write(fd, buf, len);
if ((ret == -1) {
if (errno != EINTR))
break;
//else
continue;
}
buf += (size_t) ret;
len -= (size_t) ret;
}
}
Boost backtrace
Very convenient because it prints both:
unmangled C++ function names
line numbers
automatically for you.
Usage summary:
#define BOOST_STACKTRACE_USE_ADDR2LINE
#include <boost/stacktrace.hpp>
std::cout << boost::stacktrace::stacktrace() << std::endl;
I have provided a minimal runnable example for it and many other methods at: print call stack in C or C++

How can I parse a C string (char *) with flex/bison?

In my programming project I want to parse command line attributes using flex/bison. My program is called like this:
./prog -a "(1, 2, 3)(4, 5)(6, 7, 8)" filename
Is it possible to parse this string using flex/bison without writing it to a file and parsing that file?
See this question String input to flex lexer
I think you can achieve something like that (I did a similar thing) by using fmemopen to create a stream from a char*and then replace that to stdin
Something like that (not sure if it's fully functional since I'm actually trying to remember available syscalls but it would be something similar to this)
char* args = "(1,2,3)(4,5)(6,7,8)"
FILE *newstdin = fmemopen (args, strlen (args), "r");
FILE *oldstdin = fdup(stdin);
stdin = newstdin;
// do parsing
stdin = oldstdin;
Here is a complete flex example.
%%
<<EOF>> return 0;
. return 1;
%%
int yywrap()
{
return (1);
}
int main(int argc, const char* const argv[])
{
YY_BUFFER_STATE bufferState = yy_scan_string("abcdef");
// This is a flex source. For yacc/bison use yyparse() here ...
int token;
do {
token = yylex();
} while (token != 0);
// Do not forget to tell flex to clean up after itself. Lest
// ye leak memory.
yy_delete_buffer(bufferState);
return (EXIT_SUCCESS);
}
another example. this one redefines the YY_INPUT macro:
%{
int myinput (char *buf, int buflen);
char *string;
int offset;
#define YY_INPUT(buf, result, buflen) (result = myinput(buf, buflen));
%}
%%
[0-9]+ {printf("a number! %s\n", yytext);}
. ;
%%
int main () {
string = "(1, 2, 3)(4, 5)(6, 7, 8)";
yylex();
}
int myinput (char *buf, int buflen) {
int i;
for (i = 0; i < buflen; i++) {
buf[i] = string[offset + i];
if (!buf[i]) {
break;
}
}
offset += i;
return i;
}
The answer is "Yes". See the O'Reilly publication called "lex & yacc", 2nd Edition by Doug Brown, John Levine, Tony Mason. Refer to Chapter 6, the section "Input from Strings".
I also just noticed that there are some good instructions in the section "Input from Strings", Chapter 5 of "flex and bison", by John Levine. Look out for routines yy_scan_bytes(char *bytes, int len), yy_scan_string("string"), and yy_scan_buffer(char *base, yy_size_t size). I have not scanned from strings myself, but will be trying it soon.

Resources