The reason why I would want to do this is because I want to read from a file line-by-line, and for each line check whether it matches a regex. I am using the getline() function, which puts the line into a char * type variable. I am trying to use regexec() to check for a regex match, but this function wants you to provide the string to match as a const char *.
So my question is, can I create a const char * from a char *? Or perhaps is there a better way to approach the problem I'm trying to solve here?
EDIT: I was requested to provide an example, which I didn't think about and apologise for not giving one in the first place. I did read the answer by #chqrlie before writing this. The following code gives a segmentation fault.
#define _GNU_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdbool.h>
#include <regex.h>
int main() {
FILE * file = fopen("myfile", "r");
char * line = NULL;
size_t len = 0;
ssize_t read;
regex_t regex;
const char * regexStr = "a+b*";
if (regcomp(®ex, regexStr, 0)) {
fprintf(stderr, "Could not compile regex \"%s\"\n", regexStr);
exit(1);
}
while ((read = getline(&line, &len, file)) != -1) {
int match = regexec(®ex, line, 0, NULL, 0);
if (match == 0) {
printf("%s matches\n", line);
}
}
fclose(file);
return 0;
}
char * can be converted to const char * without any special syntax. The const in this type means that the data pointed by the pointer will no be modified via this pointer.
char array[] = "abcd"; // modifiable array of 5 bytes
char *p = array; // array can be modified via p
const char *q = p; // array cannot be modified via q
Here are some examples:
int strcmp(const char *s1, const char *s2);
size_t strlen(const char *s);
char *strcpy(char *dest, const char *src);
As you can see, strcmp does not modify the strings it receives pointers to, but you can of course pass regular char * pointers to it.
Similarly, strlen does not modify the string, and strcpy modifies the destination string but not the source string.
EDIT: You problem has nothing to do with constness conversion:
You do not check the return value of fopen(), the program produces a segmentation fault on my system because myfile does not exist.
You must pass REG_EXTENDED to compile a regex with the newer syntax such asa+b*
Here is a corrected version:
#define _GNU_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
int main() {
FILE *file = fopen("myfile", "r");
char *line = NULL;
size_t len = 0;
ssize_t read;
regex_t regex;
const char *regexStr = "a+b*";
if (file == NULL) {
printf("cannot open myfile, using stdin\n");
file = stdin;
}
if (regcomp(®ex, regexStr, REG_EXTENDED)) {
fprintf(stderr, "Could not compile regex \"%s\"\n", regexStr);
exit(1);
}
while ((read = getline(&line, &len, file)) != -1) {
int match = regexec(®ex, line, 0, NULL, 0);
if (match == 0) {
printf("%s matches\n", line);
}
}
fclose(file);
return 0;
}
Related
A simple program that takes an input file specified at the terminal, and alters the text to be reversed. How can the <stdio.h> functions be converted to only linux system calls? (I assume using only libraries like <unistd.h>)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* concat(const char *str1, const char *str2)
{
char *answer = malloc(strlen(str1) + strlen(str2) + 1);
strcpy(answer, str1);
strcat(answer, str2);
return answer;
}
int main(int argc, char** argv) {
FILE * fp;
char * line = NULL;
size_t len = 0;
fp = fopen(argv[1], "r");
if (fp == NULL){
perror("\nError ");
exit(1);
}
char *rev1 = "rev ";
char *rev2 = {argv[1]};
char *rev3 = concat(rev1,rev2);
system(rev3);
fclose(fp);
return 0;
}
Thank you for any help. Company only wants me to use system calls for some reason, this internship is not going great!
don't understand how to properly implement [read and write]
Assuming you mean call them (not implement them), the catch with read and write is that they may read or write less than requested, so you have to call them in a loop.
size_t to_write = strlen(str);
while (to_write) {
ssize_t written = write(fd, str, to_write);
if (written < 0) {
perror(NULL);
exit(1);
}
str += written;
to_write -= written;
}
Reading works the same way if you know how much you need to read or if you're trying to read an entire file. (To read the entire file, read chunks until read returns 0. Factors of 8*1024 are nice chunk sizes.)
Otherwise, it gets far more complicated. How do you know how much to read before you read it? If you want to read a line, for example, you have no idea how long the line is until you encounter the terminating line feed. You could read a character at a time, but that's very inefficient. You could do like like stdio does and use a buffer that holds the excess. At which point you might as well use stdio.
//#include<stdio.h> not used
#include<stdlib.h>
#include<string.h>
#include<unistd.h>
#include<sys/types.h>
#include<sys/stat.h>
#include<fcntl.h>
char* swap(const char *one, const char *two)
{
char *result = malloc(strlen(one) + strlen(two) + 1);
strcpy(result, one);
strcat(result, two);
return result;
}
int main(int argc, char** argv) {
if (argc != 2){
printf("Error, wrong number of arguments!\n");
exit(1);
}
int fd = open(argv[1], O_RDONLY); //<------------------<
char * line = NULL;
size_t len = 0;
char *string1 = "rev ";
char *string2 = {argv[1]};
char *result = swap(string1,string2);
system(result);
return 0;
}
I'm trying to write a function named read_line() to gets data from a text file, line by line. After calling the function, the line would be written to str pointer and the function will return the length of the line. Unfortunately, I ended up getting null all the time.
/* readline.c*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "readline.h"
int read_line(char *str)
{
/* Open the file for reading */
size_t line_buf_size = 0;
ssize_t line_size;
FILE *fp = fopen("0.txt", "r");
if (!fp)
{
fprintf(stderr, "Error opening file '%s'\n", "0.txt");
return EXIT_FAILURE;
}
/* Get the first line of the file. */
line_size = getline(&str, &line_buf_size, fp);
printf(str);
return line_size - 2;
}
/* main.c*/
#include "readline.h"
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
char *str = NULL;
int num;
num = read_line(str);
printf("%s", str);
printf("%d", num);
return 0;
}
Expected: get the content of the first line of the text file.
Actual: (null)12
In main, you have an object named str of type char*. read_line takes the value of the pointer as a parameter, but does not modify the pointer itself. This means that no matter what happens, the value of str will still be NULL when it reaches the printfs in main
One approach would be to pass a pointer to your pointer object, instead of its value.
int read_line(char **str)
{
...
/* Get the first line of the file. */
line_size = getline(str, &line_buf_size, fp);
printf("%s", *str);
...
}
This way, str will be modified by getline
I just want to extract the particular word from the string.
My program is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 100
int main() {
FILE *f;
char buffer[100];
char buf[100];
int count=0;
char res[100];
f=fopen("1JAC.pdb","rb");
while(fgets(buffer,BUFFER_SIZE,f))
{
if(strncmp(buffer,"ATOM",4)==0 && strncmp(buffer+13,"CA",2)==0 && strncmp(buffer+21,"A",1)==0)
{
strcpy(buf,buffer);
}
printf (buf);
Output of the program is
ATOM 1033 CA LEU A 133 33.480 94.428 72.166 1.00 16.93 C
I just want to extract the word "LEU" using substring. I tried something like this:
Substring(17,3,buf);
But it doesn't work...
Could someone please tell about the substring in C.
Memcpy seems to be best way to do this ...
memcpy( destBuff, sourceBuff + 17, 3 );
destBuff[ 3 ] = '\0';
Please remember to add the null terminators if needed (as I have done in the example).
Also this has been answered before, several times on Stack-overflow
(Get a substring of a char*)
//Use the following substring function,it will help you.
int main(int argc, char *argv[])
{
FILE *filepointer;
char string[1700];
filepointer=fopen("agg.txt", "r");
if (filepointer==NULL)
{
printf("Could not open data.txt!\n");
return 1;
}
while (fgets(string, sizeof(string), filepointer) != NULL)
{
char* temp=substring(string,17,3);/*here 17 is the start position and 3 is the length of the string to be extracted*/
}
return 0;
}
char *substring(char *string, int position, int length)
{
char *pointer;
int c;
pointer = (char*) malloc(length+1);
if (pointer == NULL)
{
printf("Unable to allocate memory.\n");
exit(1);
}
for (c = 0 ; c < length ; c++)
{
*(pointer+c) = *(string+position-1);
string++;
}
*(pointer+c) = '\0';
return pointer;
}
char out[4] = {0};
strncpy(out, buf+17, 3);
I've got a problem reading a couple of lines from a read-only FIFO. In particular, I have to read two lines — a number n, followed by a \n and a string str — and my C program should write str in a write-only FIFO for n times. This is my attempt.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
char *readline(int fd);
int main(int argc, char** argv) {
int in = open(argv[1], O_RDONLY);
mkfifo(argv[2], 0666);
int out = open(argv[2] ,O_WRONLY);
char *line = (char *) malloc(50);
int n;
while (1) {
sscanf(readline(in), "%d", &n);
strcpy(line, readline(in));
int i;
for (i = 0; i < n; i++) {
write(out, line, strlen(line));
write(out, "\n", 1);
}
}
close(in);
close(out);
return 0;
}
char *readline(int fd) {
char *c = (char *) malloc(1);
char line[50];
while (read(fd, c, 1) != 0) {
if (strcmp(c, "\n") == 0) {
break;
}
strcat(line, c);
}
return line;
}
The code is working properly, but it puts a random number of newlines after the last string repetition. Also, this number changes at each execution.
Could someone please give me any help?
Besides the facts that reading character wise and and comparing two characters using "string" comparsion both is far from being efficient, readline() returns a pointer to memory being declared local to readline(), that is line[50] The memory gets deallocated as soon as readline() returns, so accessing it afterwards invokes undefine behaviour.
One possibility to fix this is to declare the buffer to read the line into outside readline() and pass a reference to it down like so:
char * readline(int fd, char * line, size_t size)
{
if ((NULL != line) && (0 < size))
{
char c = 0;
size_t i = 0;
while (read(fd, &c, 1) >0)
{
if ('\n' == c) or (size < i) {
break;
}
line[i] = c;
++i;
}
line [i] = 0;
}
return line;
}
And then call it like this:
char * readline(int fd, char * line, size_t size);
int main(void)
{
...
char line[50] = "";
...
... readline(in, line, sizeof(line) - 1) ...
I have not tried running your code, but in your readline function you have not terminated the line with null ('\0') character. once you hit '\n' character you just breaking the while loop and returning the string line. Try adding '\0' character before returning from the function readline.
Click here for more info.
Your code did not work on my machine, and I'd say you're lucky to get any meaningful results at all.
Here are some problems to consider:
readline returns a locally defined static char buffer (line), which will be destroyed when the function ends and the memory it once occupied will be free to be overwritten by other operations.
If line was not set to null bytes on allocation, strcat would treat its garbage values as characters, and could possibly try to write after its end.
You allocate a 1-byte buffer (c), I suspect, just because you need a char* in read. This is unnecessary (see the code below). What's worse, you do not deallocate it before readline exits, and so it leaks memory.
The while(1) loop would re-read the file and re-print it to the output fifo until the end of time.
You're using some "heavy artillery" - namely, strcat and memory allocation - where there are simpler approaches.
Last, some C standard versions may require that you declare all your variables before using them. See this question.
And here's how I modified your code. Note that, if the second line is longer than 50 characters, this code may also not behave well. There are techniques around the buffer limit, but I don't use any in this example:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
char *readline(int fd, char * buffer);
int main(int argc, char** argv) {
int in = open(argv[1], O_RDONLY);
int out;
int n;
int i;
char line[50];
memset(line, 0, 50);
mkfifo(argv[2], 0666);
out = open(argv[2] ,O_WRONLY);
sscanf(readline(in, line), "%d", &n);
strcpy(line, readline(in, line));
for (i = 0; i < n; i++) {
write(out, line, strlen(line));
write(out, "\n", 1);
}
close(in);
close(out);
return 0;
}
char *readline(int fd, char * buffer) {
char c;
int counter = 0;
while (read(fd, &c, 1) != 0) {
if (c == '\n') {
break;
}
buffer[counter++] = c;
}
return buffer;
}
This works on my box as you described. Compiled with GCC 4.8.2 .
I'm trying to do some pattern matching with GNU's regex.h. I've been able to reproduce what is happening with a simple example as follows:
#include <regex.h>
#include <stdio.h>
#include <string.h>
int main() {
char pat[] = "[Mm]ark";
char name[] = "Mark";
struct re_pattern_buffer pat_buff;
pat_buff.translate = 0;
pat_buff.fastmap = 0;
pat_buff.buffer = 0;
pat_buff.allocated = 0;
re_syntax_options = RE_SYNTAX_EGREP;
printf("Ret value from re_compile_pattern: %d\n", re_compile_pattern(pat, strlen(pat), &pat_buff));
printf("Ret value from re_match: %d\n", re_match(&pat_buff, name, strlen(name), 0, NULL));
regfree(&pat_buff);
return 0;
}
re_compile_pattern() returns 0 as expected.
re_match() returns -1 and this is contrary to what I would expect.
Can anyone give me any pointers or show me what I am missing?
I couldn't compile your code on my Fedora 10 box. Instead I tried reworking it to use the POSIX regcomp() and regexec() API as R.. suggested above. I know this doesn't quite answer your question regarding the GNU API, but the following code works for me:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <regex.h>
char *default_pattern = "[Mm]ark";
char *default_string = "youMarkmywords";
int main(int argc, char **argv) {
int ret;
regex_t preg;
char *pattern = default_pattern;
char *string = default_string;
if (argc >= 3) {
pattern = argv[1];
string = argv[2];
}
ret = regcomp(&preg, pattern, REG_EXTENDED|REG_NOSUB);
if (0 != ret) {
fprintf(stderr, "regcomp failed to compile %s\n", pattern);
exit(EXIT_FAILURE);
}
ret = regexec(&preg, string, 0, NULL, 0);
if (0 == ret) {
printf("Input \"%s\" matched pattern \"%s\"\n", pattern, string);
} else {
fprintf(stderr, "Input \"%s\" didn't match pattern \"%s\"\n", pattern, string);
}
regfree(&preg);
return 0;
}
Make sure the intermediate steps, i.e. the compilation, succeed. You should probably pass strlen(pat) rather than sizeof pat to re_compile_pattern().