Get secondary storage details using C on Linux - c

I need simple a way to get the secondary storage details (like total size, used and free space) in a (daemon) C code for Linux;
This are the things I tried
statvfs - don't know how to get disk details instead of files
Using system (df -h --total | grep total > disk.stat) in the C code and then read the file.
But the above involves file write and read which is not efficient cause this C code is a daemon which will be polling the system details continuously as input to a graph generation.
If there no is other way, tell me a simple and fast ipc mechanism with example for communication between this bash and C code.
/*
* #breif returns total percentage of secondary storage used
*
* - uses bash command to get storage data and store in a file
* - and use c code retrive the percent of usage from file and return it
*/
int calculate_storage_size( )
{
if ( system("df -h --total | grep total > disk.stat") >= 0 )
{
char *temp_char_ptr = (char *)NULL;
int storage_size_percent = -1;
FILE *fp ;
fp = fopen ("disk.stat" , "r");
if (fp != (FILE *)NULL)
{
temp_char_ptr = (char*) calloc ( 6 , 1 );
fscanf( fp,"%s %s %s %s %d", temp_char_ptr, temp_char_ptr, temp_char_ptr, temp_char_ptr, &storage_size_percent);
}
free (temp_char_ptr);
fclose(fp);
return storage_size_percent;
}
return -1;
}

I would suggest it would be better to let the user specify which mounts should be considered in the total, or use a heuristic to omit system and temporary mounts.
Consider the following example, info.c:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/statvfs.h>
#include <mntent.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
static void free_array(char **array)
{
if (array) {
size_t i;
for (i = 0; array[i] != NULL; i++) {
free(array[i]);
array[i] = NULL;
}
free(array);
}
}
static char **normal_mounts(void)
{
char **list = NULL, **temp;
size_t size = 0;
size_t used = 0;
char buffer[4096];
struct mntent entry;
FILE *mounts;
mounts = fopen("/proc/mounts", "r");
if (!mounts)
return NULL;
while (getmntent_r(mounts, &entry, buffer, sizeof buffer) == &entry)
if (strcmp(entry.mnt_fsname, "tmpfs") &&
strcmp(entry.mnt_fsname, "swap") &&
strcmp(entry.mnt_dir, "/proc") && strncmp(entry.mnt_dir, "/proc/", 6) &&
strcmp(entry.mnt_dir, "/boot") && strncmp(entry.mnt_dir, "/boot/", 6) &&
strcmp(entry.mnt_dir, "/sys") && strncmp(entry.mnt_dir, "/sys/", 5) &&
strcmp(entry.mnt_dir, "/run") && strncmp(entry.mnt_dir, "/run/", 5) &&
strcmp(entry.mnt_dir, "/dev") && strncmp(entry.mnt_dir, "/dev/", 5) &&
strcmp(entry.mnt_dir, "/mnt") && strncmp(entry.mnt_dir, "/mnt/", 5) &&
strcmp(entry.mnt_dir, "/media") && strncmp(entry.mnt_dir, "/media/", 7) &&
strcmp(entry.mnt_dir, "/var/run") && strncmp(entry.mnt_dir, "/var/run/", 9)) {
if (used >= size) {
size = (used | 15) + 17;
temp = realloc(list, size * sizeof list[0]);
if (!temp) {
endmntent(mounts);
free_array(list);
errno = ENOMEM;
return NULL;
}
list = temp;
}
if (!(list[used++] = strdup(entry.mnt_dir))) {
endmntent(mounts);
free_array(list);
errno = ENOMEM;
return NULL;
}
}
if (ferror(mounts) || !feof(mounts)) {
endmntent(mounts);
free_array(list);
errno = EIO;
return NULL;
} else
endmntent(mounts);
if (!used) {
free_array(list);
errno = 0;
return NULL;
}
if (size != used + 1) {
size = used + 1;
temp = realloc(list, size * sizeof list[0]);
if (!temp) {
free_array(list);
errno = ENOMEM;
return NULL;
}
list = temp;
}
list[used] = NULL;
errno = 0;
return list;
}
static int statistics(const char **mountpoint, uint64_t *bytes_total, uint64_t *bytes_free)
{
struct statvfs info;
uint64_t btotal = 0;
uint64_t bfree = 0;
size_t i;
if (!mountpoint)
return errno = EINVAL;
for (i = 0; mountpoint[i] != NULL; i++)
if (statvfs(mountpoint[i], &info) != -1) {
btotal += (uint64_t)info.f_frsize * (uint64_t)info.f_blocks;
bfree += (uint64_t)info.f_bsize * (uint64_t)info.f_bavail;
} else
return errno;
if (bytes_total)
*bytes_total = btotal;
if (bytes_free)
*bytes_free = bfree;
return 0;
}
int main(int argc, char *argv[])
{
uint64_t total = 0;
uint64_t nfree = 0;
if (argc > 1) {
if (statistics((const char **)argv + 1, &total, &nfree)) {
fprintf(stderr, "%s.\n", strerror(errno));
return EXIT_FAILURE;
}
} else {
char **mounts = normal_mounts();
size_t i;
if (!mounts) {
if (errno)
fprintf(stderr, "Error determining file systems: %s.\n", strerror(errno));
else
fprintf(stderr, "No normal file systems found.\n");
return EXIT_FAILURE;
}
fprintf(stderr, "Considering mount points");
for (i = 0; mounts[i] != NULL; i++)
fprintf(stderr, " %s", mounts[i]);
fprintf(stderr, "\n");
if (statistics((const char **)mounts, &total, &nfree)) {
fprintf(stderr, "%s.\n", strerror(errno));
return EXIT_FAILURE;
}
free_array(mounts);
}
printf("%20" PRIu64 " bytes total\n", total);
printf("%20" PRIu64 " bytes free\n", nfree);
return EXIT_SUCCESS;
}
The statistics() function takes a NULL-terminated array of mount points, and two pointers to unsigned 64-bit integers. The function returns 0 if successful, and a nonzero errno code otherwise. If successful, the function will set the total number of bytes in the filesystems to the first integer, and the number of free bytes in the second.
If you supply one or more mounts points as command line arguments, only those are considered. (POSIX says argv[argc] == NULL, so this usage is safe.)
Otherwise, the normal_mounts() function is used to parse /proc/mounts to obtain a list of "normal" mount points. The function uses getmntent() to read each entry (line) from the kernel-provided pseudo-file. All tmpfs (ramdisks) and swap filesystems are excluded, as are those mounted at or under /proc, /boot, /sys, /run, /dev, /mnt, /media, and /var/run. This is just a crude heuristic, not a known good approach.
In a daemon, or even in a graphical application, you call only (your equivalent of) the statistics() function, with the same array of mount points. You could even consider tracking each mount point separately, and let the user filter and combine the information they are interested in. In fact, I would recommend that: I personally might be interested in seeing the fluctuations in my temporary file usage (on machines where /tmp and /var/tmp are tmpfs mounts), as well as track my long-term usage of /home.
In a daemon, you can use HUP or USR1 or USR2 signals to indicate when the user wants you to reload the configuration -- the mount point list, here. I do not believe it would be that interesting to integrate it to DBUS for detecting removable media mounts/unmounts, but of course you can if you think it useful.
If you compile the above program using e.g.
gcc -Wall -O2 info.c -o info
and run
./info
it will output something like
Considering mount points / /home
119989497856 bytes total
26786156544 bytes free
where the first line is output to standard error, and the bytes lines to standard output. You can also specifically name the mount points -- make sure they are different, as the code does not check for duplicate mounts --:
./info /home /tmp
If you are wondering how you could determine whether two directories are on the same mount or not: call stat(path1, &info1) on one, and stat(path2, &info2) on the other. If and only if (info1.st_dev == info2.st_dev), the two paths are on the same mount. (One device may be mounted multiple times at different points, using e.g. bind mounts, but usually the above check suffices.)
If you find all the above code annoying, you can always rely on the df utility. To ensure the output is in the C/POSIX locale (and not, say, in French or Finnish), use
handle = popen("LANG=C LC_ALL=C df -Pl", "r");
or similar, and read the output using len = getline(&line, &size, handle).

You can use popen() instead of system()/fopen(): The system will give you a readable file without using hard-drive.

There's no portable ANSI C mechanism except the system and file kludge, and even that is a bit of an illusion as it depends on df being present. However the Posix function popen() does essentially the same thing, but gives you the output as a FILE *.

#include <stdio.h>
#include <stdlib.h>
#include <sys/statvfs.h>
int main( )
{
struct statvfs stat;
statvfs("/media/hp",&stat);
printf("\n\navail size --%ld GB\n\n", stat.f_bsize * stat.f_bavail / 1000000000 );
printf("\n\nblocks size --%ld GB\n\n", stat.f_frsize * stat.f_blocks / 1000000000 );
}
I Finally did it statvfs itself it works fine .

Related

How to use scandir() in C to list sorted subdirectories recursively

I'm implementing parts of the Linux ls command in C. I want to sort the contents of directories lexicographically, which I've been doing using scandir(). This is easy enough for listing single directories, but I'm having trouble doing it for listing subdirectories recursively. My current code: (results in a segmentation faults once a directory type is reached)
void recursive(char* arg){
int i;
struct dirent **file_list;
int num;
char* next_dir;
num = scandir(arg, &file_list, NULL, alphasort);
for(i = 0; i < num; i++) {
if(file_list[i]->d_type == DT_DIR) {
if(strcmp(".", file_list[i]->d_name) != 0 && strcmp("..", file_list[i]->d_name) != 0) {
// Directories are printed with a colon to distinguish them from files
printf("%s: \n", file_list[i]->d_name);
strcpy(next_dir, arg);
strcat(next_dir, "/");
strcat(next_dir, file_list[i]->d_name);
printf("\n");
recursive(next_dir);
}
} else {
if(strcmp(".", file_list[i]->d_name) != 0 && strcmp("..", file_list[i]->d_name) != 0) {
printf("%s \n", file_list[i]->d_name);
}
}
}
}
int main(void) {
recursive(".");
return 0;
}
There are two recommended methods for traversing entire filesystem trees in Linux and other POSIXy systems:
nftw(): man 3 nftw
Given an initial path, a callback function, the maximum number of descriptors to use, and a set of flags, nftw() will call the callback function once for every filesystem object in the subtree. The order in which entries in the same directory is called is not specified, however.
This is the POSIX.1 (IEEE 1003) function.
fts_open()/fts_read()/fts_children()/fts_close(): man 3 fts
The fts interface provides a way to traverse filesystem hierarchies. The fts_children() provides a linked list of filesystem entries sorted by the comparison function specified in the fts_open() call. It is rather similar to how scandir() returns an array of filesystem entries, except that the two use very different structures to describe each filesystem entry.
Prior to glibc 2.23 (released in 2016), the Linux (glibc) fts implementation had bugs when using 64-bit file sizes (so on x86-64, or when compiling with -D_FILE_OFFSET_BITS=64).
These are BSD functions (FreeBSD/OpenBSD/macOS), but are available in Linux also.
Finally, there is also the atfile version of scandir(), scandirat(), that returns the filtered and sorted filesystem entries from a specific directory, but in addition to the pathname, it takes a file descriptor to the relative root directory to be used as a parameter. (If AT_FDCWD is used instead of a file descriptor, then scandirat() behaves like scandir().)
The simplest option here is to use nftw(), store all walked paths, and finally sort the paths. For example, walk.c:
// SPDX-License-Identifier: CC0-1.0
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <locale.h>
#include <ftw.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
struct entry {
/* Insert additional properties like 'off_t size' here. */
char *name; /* Always points to name part of pathname */
char pathname[]; /* Full path and name */
};
struct listing {
size_t max; /* Number of entries allocated for */
size_t num; /* Number of entries in the array */
struct entry **ent; /* Array of pointers, one per entry */
};
#define STRUCT_LISTING_INITIALIZER { 0, 0, NULL }
/* Locale-aware sort for arrays of struct entry pointers.
*/
static int entrysort(const void *ptr1, const void *ptr2)
{
const struct entry *ent1 = *(const struct entry **)ptr1;
const struct entry *ent2 = *(const struct entry **)ptr2;
return strcoll(ent1->pathname, ent2->pathname);
}
/* Global variable used by nftw_add() to add to the listing */
static struct listing *nftw_listing = NULL;
static int nftw_add(const char *pathname, const struct stat *info, int typeflag, struct FTW *ftwbuf)
{
const char *name = pathname + ftwbuf->base;
/* These generate no code, just silences the warnings about unused parameters. */
(void)info;
(void)typeflag;
/* Ignore "." and "..". */
if (name[0] == '.' && !name[1])
return 0;
if (name[0] == '.' && name[1] == '.' && !name[2])
return 0;
/* Make sure there is room for at least one more entry in the listing. */
if (nftw_listing->num >= nftw_listing->max) {
const size_t new_max = nftw_listing->num + 1000;
struct entry **new_ent;
new_ent = realloc(nftw_listing->ent, new_max * sizeof (struct entry *));
if (!new_ent)
return -ENOMEM;
nftw_listing->max = new_max;
nftw_listing->ent = new_ent;
}
const size_t pathnamelen = strlen(pathname);
struct entry *ent;
/* Allocate memory for this entry.
Remember to account for the name, and the end-of-string terminator, '\0', at end of name. */
ent = malloc(sizeof (struct entry) + pathnamelen + 1);
if (!ent)
return -ENOMEM;
/* Copy other filesystem entry properties to ent here; say 'ent->size = info->st_size;'. */
/* Copy pathname, including the end-of-string terminator, '\0'. */
memcpy(ent->pathname, pathname, pathnamelen + 1);
/* The name pointer is always to within the pathname. */
ent->name = ent->pathname + ftwbuf->base;
/* Append. */
nftw_listing->ent[nftw_listing->num++] = ent;
return 0;
}
/* Scan directory tree starting at path, adding the entries to struct listing.
Note: the listing must already have been properly initialized!
Returns 0 if success, nonzero if error; -1 if errno is set to indicate error.
*/
int scan_tree_sorted(struct listing *list, const char *path)
{
if (!list) {
errno = EINVAL;
return -1;
}
if (!path || !*path) {
errno = ENOENT;
return -1;
}
nftw_listing = list;
int result = nftw(path, nftw_add, 64, FTW_DEPTH);
nftw_listing = NULL;
if (result < 0) {
errno = -result;
return -1;
} else
if (result > 0) {
errno = 0;
return result;
}
if (list->num > 2)
qsort(list->ent, list->num, sizeof list->ent[0], entrysort);
return 0;
}
int main(int argc, char *argv[])
{
struct listing list = STRUCT_LISTING_INITIALIZER;
setlocale(LC_ALL, "");
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *arg0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", arg0);
fprintf(stderr, " %s .\n", arg0);
fprintf(stderr, " %s TREE [ TREE ... ]\n", arg0);
fprintf(stderr, "\n");
fprintf(stderr, "This program lists all files and directories starting at TREE,\n");
fprintf(stderr, "in sorted order.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
for (int arg = 1; arg < argc; arg++) {
if (scan_tree_sorted(&list, argv[arg])) {
fprintf(stderr, "%s: Error scanning directory tree: %s.\n", argv[arg], strerror(errno));
return EXIT_FAILURE;
}
}
printf("Found %zu entries:\n", list.num);
for (size_t i = 0; i < list.num; i++)
printf("\t%s\t(%s)\n", list.ent[i]->pathname, list.ent[i]->name);
return EXIT_SUCCESS;
}
Compile using gcc -Wall -Wextra -O2 walk.c -o walk, and run using e.g. ./walk ...
The scan_tree_sorted() function calls nftw() for the directory specified, updating the global variable nftw_listing so that the nftw_add() callback function can add each new directory entry to it. If the listing contains more that one entry afterwards, it is sorted using qsort() and a locale-aware comparison function (based on strcoll()).
nftw_add() skips . and .., and adds every other pathname to the listing structure nftw_listing. It automatically grows the array as needed in linear fashion; the new_max = nftw_listing->num + 1000; means we allocate in units of a thousand (pointers).
The scan_tree_sorted() can be called multiple times with the same listing as the target, if one wants to list disjoint subtrees in one listing. Note, however, that it does not check for duplicates, although those could easily be filtered out after the qsort.

C program to count specific words in FILE

The objective of the program is to rate a person's resume. The program should open and read two .txt type FILES. One of the files contains the keywords and the other is the resume itself. The process of the program consists in looping through the keywords.txt and then try to find a similar word in the resume.txt. I got it almost working but the program seems to be considering the first space as the end of the file in the keywords FILE.
This is what I have:(I tried switching the first word on the keywords and the count seems to work/would be goo to scan only characters without symbols and its necessary to count the occurrence of every single keyword)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int main(){
FILE* txtKey;
FILE* txtResume;
char keyWords[1000];
char word[10000];
int count;
txtKey=fopen("keywords.txt", "r");
if(txtKey == NULL){
printf("Failed to open txtKey file \n");
return 1;
}
txtResume=fopen("resume.txt", "r");
if(txtResume == NULL){
printf("Failed to open txtResume file \n");
return 1;
}
while (fscanf(txtKey, "%s", keyWords) != EOF)
{
while (fscanf(txtResume, "%s", word) != EOF)
{
if (strstr(word, keyWords) != NULL)
{
count++;
}
}
}
printf("The keywords were found %d times in your resume!", count);
fclose(txtResume);
fclose(txtKey);
return 0;
}//END MAIN
Note: This is prefaced by my top comments.
I've created a word list struct that holds a list of words. It is used twice. Once, to store the list of keywords. And, a second time to parse the current line of the resume file.
I coded it from scratch, because it's somewhat different than what you had:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef DEBUG
#define dbgprt(_fmt...) \
do { \
printf(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) \
do { \
} while (0)
#endif
typedef struct {
int list_max;
int list_cnt;
char **list_words;
} list_t;
list_t keywords;
list_t linewords;
char buf[10000];
int
wordsplit(FILE *xf,list_t *list,int storeflg)
{
char *cp;
char *bp;
int valid;
if (! storeflg)
list->list_cnt = 0;
do {
cp = fgets(buf,sizeof(buf),xf);
valid = (cp != NULL);
if (! valid)
break;
bp = buf;
while (1) {
cp = strtok(bp," \t\n");
bp = NULL;
if (cp == NULL)
break;
// grow the list
if (list->list_cnt >= list->list_max) {
list->list_max += 100;
list->list_words = realloc(list->list_words,
sizeof(char *) * (list->list_max + 1));
}
if (storeflg)
cp = strdup(cp);
list->list_words[list->list_cnt++] = cp;
list->list_words[list->list_cnt] = NULL;
}
} while (0);
return valid;
}
void
listdump(list_t *list,const char *tag)
{
char **cur;
dbgprt("DUMP: %s",tag);
for (cur = list->list_words; *cur != NULL; ++cur) {
dbgprt(" '%s'",*cur);
}
dbgprt("\n");
}
int
main(void)
{
FILE *xf;
int count;
xf = fopen("keywords.txt","r");
if (xf == NULL)
return 1;
while (1) {
if (! wordsplit(xf,&keywords,1))
break;
}
fclose(xf);
listdump(&keywords,"KEY");
count = 0;
xf = fopen("resume.txt","r");
if (xf == NULL)
return 2;
while (1) {
if (! wordsplit(xf,&linewords,0))
break;
listdump(&linewords,"CUR");
for (char **str = linewords.list_words; *str != NULL; ++str) {
dbgprt("TRYCUR: '%s'\n",*str);
for (char **key = keywords.list_words; *key != NULL; ++key) {
dbgprt("TRYKEY: '%s'\n",*key);
if (strcmp(*str,*key) == 0) {
count += 1;
break;
}
}
}
}
fclose(xf);
printf("keywords found %d times\n",count);
return 0;
}
UPDATE:
Any option to make it simpler? I don't think I know all the concepts of this answer, although tis result is perfect.
Yes, based on your code, I realized that what I did was a bit advanced. But, by reusing the list as I did, it actually saved a bit of replicated code (e.g. Why have separate parsing code for the keywords and resume data when they are both very similar.
There's standard documentation for all the libc functions (e.g. fgets, strtok, strcmp).
If you know the [maximum] number of keyword beforehand [this is possible to do], you could use a fixed size char ** array [similar to what you had].
Or, you could just do a realloc on a char **keywords array on every new keyword (e.g. cp). And, maintain a separate count variable (e.g. int keycnt). This would be fine if we only needed one list (i.e. we could forego the list_t struct).
We could replicate some of the keyword code for the second loop in main, and again, use different variables for the array and its count.
But, this is wasteful. list_t is an example of using realloc efficiently (i.e. calling it less often). This is a standard technique.
If you do a websearch on dynamic resize array realloc, one of entries you'll find is: https://newton.ex.ac.uk/teaching/resources/jmr/appendix-growable.html
Note the use of strdup to preserve the word values for the keyword list beyond the next call to fgets.
Hopefully, that covers enough so you can study it a bit. The whole "how to implement a dynamic resizing array using realloc?" shows up quite frequently a question on SO, so you could also search here for a question on it.
Also, how could it word if the keywords.txt list has words separated by ","?
To parse by ",", just change the second arg to strtok to include it (e.g. " \t,\n"). That will work for abc def, abc,def, or abc, def.

Multithreaded reading/doing things with chars from character array in C

I am trying to read a character array that contains the contents of many large files. The character array is going to be quite large, because the files are large, so I want to do it using multithreading (pthread). I want the user to be able to designate how many threads they want to run. I have something working, but increasing the number of threads does nothing to affect performance (i.e. 1 thread finishes just as fast as 10). In fact, it seems to be just the opposite: telling the program to use 10 threads runs much slower than telling it to use 1.
Here is the method for slicing up the character array according to the number of threads the user passes to the program. I know this is wrong, I could use some advice here.
//Universal variables
int numThreads;
size_t sizeOfAllFiles; // Size, in bytes, of allFiles
char* allFiles; // Where all of the files are stored, together
void *zip(void *nthread);
void *zip(void *nThread) {
int currentThread = *(int*)nThread;
int remainder = sizeOfAllFiles % currentThread;
int slice = (sizeOfAllFiles-remainder) / currentThread;
// I subtracted the remainder for my testing
// because I didn't want to worry about whether
// the char array's size is evenly divisible by numThreads
int i = (slice * (currentThread-1));
char currentChar = allFiles[i]; //Used for iterating
while(i<(slice * currentThread) && i>=(slice * (currentThread-1))) {
i++;
// Do things with the respective thread's
// 'slice' of the array.
.....
}
return 0;
}
And here is how I am spawning the threads, which I am almost positive that I am doing correctly:
for (int j = 1; j <= threadNum; j++) {
k = malloc(sizeof(int));
*k = j;
if (pthread_create (&thread[j], NULL, zip, k) != 0) {
printf("Error\n");
free(thread);
exit(EXIT_FAILURE);
}
}
for (int i = 1; i <= threadNum; i++)
pthread_join (thread[i], NULL);
This is all really confusing for me so if I could get some help on this, I'd greatly appreciate it. I specifically am struggling with the slicing part (cutting it up correctly), and with not seeing performance gains by using more than one thread. Thanks in advance.
I'm starting by throwing a test program at you:
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <time.h>
bool
EnlargeBuffer(char ** const buffer_pointer,
size_t * const buffer_size)
{
char * larger_buffer = realloc(*buffer_pointer,
2 * *buffer_size);
if (! larger_buffer) {
larger_buffer = realloc(*buffer_pointer,
*buffer_size + 100);
if (! larger_buffer) {
return false;
}
*buffer_size += 100;
} else {
*buffer_size *= 2;
}
*buffer_pointer = larger_buffer;
printf("(Buffer size now at %zu)\n", *buffer_size);
return true;
}
bool
ReadAll(FILE * const source,
char ** pbuffer,
size_t * pbuffer_size,
size_t * pwrite_index)
{
int c;
while ((c = fgetc(source)) != EOF) {
assert(*pwrite_index < *pbuffer_size);
(*pbuffer)[(*pwrite_index)++] = c;
if (*pwrite_index == *pbuffer_size) {
if (! EnlargeBuffer(pbuffer, pbuffer_size)) {
free(*pbuffer);
return false;
}
}
}
if (ferror(source)) {
free(*pbuffer);
return false;
}
return true;
}
unsigned
CountAs(char const * const buffer,
size_t size)
{
unsigned count = 0;
while (size--)
{
if (buffer[size] == 'A') ++count;
}
return count;
}
int
main(int argc, char ** argv)
{
char * buffer = malloc(100);
if (! buffer) return 1;
size_t buffer_size = 100;
size_t write_index = 0;
clock_t begin = clock();
for (int i = 1; i < argc; ++i)
{
printf("Reading %s now ... \n", argv[i]);
FILE * const file = fopen(argv[i], "r");
if (! file) return 1;
if (! ReadAll(file, &buffer, &buffer_size, &write_index))
{
return 1;
}
fclose(file);
}
clock_t end = clock();
printf("Reading done, took %f seconds\n",
(double)(end - begin) / CLOCKS_PER_SEC);
begin = clock();
unsigned const as = CountAs(buffer, write_index);
end = clock();
printf("All files have %u 'A's, counting took %f seconds\n",
as,
(double)(end - begin) / CLOCKS_PER_SEC);
}
This program reads all files (passed as command line arguments) into one big large char * buffer, and then counts all bytes which are == 'A'. It also times both of these steps.
Example run with (shortened) output on my system:
# gcc -Wall -Wextra -std=c11 -pedantic allthefiles.c
# dd if=/dev/zero of=large_file bs=1M count=1000
# ./a.out allthefiles.c large_file
Reading allthefiles.c now ...
(Buffer size now at 200)
...
(Buffer size now at 3200)
Reading large_file now ...
(Buffer size now at 6400)
(Buffer size now at 12800)
...
(Buffer size now at 1677721600)
Reading done, took 4.828559 seconds
All files have 7 'A's, counting took 0.764503 seconds
Reading took almost 5 seconds, but counting (= iterating once, in a single thread, over all bytes) took a bit less than 1 second.
You're optimizing at the wrong place!
Using 1 thread to read all files, and then using N threads to operate on that one buffer isn't going to bring you places. The fastest way to read 1 file is to use 1 thread. For multiple files, use 1 thread per file!
So, in order to achieve the speedup that you need to show for your assignment:
Create a pool of threads with variable size.
Have a pool of tasks, where each task consists of
read one file
compute it's run-length encoding
store the run-length encoded file
let the threads take tasks from your task pool.
Things to consider: How do you combine the results of each task? Without requiring (costly) synchronization.

Pointer being freed was not allocated, Abort trap: 6

I'm not proficient in C programming so please excuse me if this isn't a strong question. In the following code, I can only allocate memory to samplesVecafter obtaining the value of nsamplepts, but I need to return the vector samplesVec to the main for further use (not yet coded). However, I'm getting the following error:
Error in Terminal Window:
ImportSweeps(3497,0x7fff7b129310) malloc: * error for object 0x7fdaa0c03af8: pointer being freed was not allocated
* set a breakpoint in malloc_error_break to debug
Abort trap: 6
I'm using Mac OS X Mavericks with the gcc compiler. Thanks for any help.
*EDITED!!! AFTER VALUABLE INPUTS FROM COMMENTATORS, THE FOLLOWING REPRESENTS A SOLUTION TO THE ORIGINAL PROBLEM (WHICH IS NO LONGER AVAILABLE) *
The following code modification seemed to solve my original questions. Thanks for the valuable inputs everyone!
/* Header Files */
#define LIBAIFF_NOCOMPAT 1 // do not use LibAiff 2 API compatibility
#include <libaiff/libaiff.h>
#include <unistd.h>
#include <stdio.h>
#include <dirent.h>
#include <string.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <math.h>
/* Function Declarations */
void FileSearch(char*, char*, char*, char*, char*);
int32_t *ImportSweeps(char*);
/* Main */
int main()
{
char flag1[2] = "N";
char binname[20] = "bin1"; // dummy assignment
char buildfilename[40] = "SweepR";
char skeletonpath[100] = "/Users/.../Folder name/";
int k, len;
/* Find the sweep to be imported in the directory given by filepath */
FileSearch(skeletonpath, binname, buildfilename, skeletonpath, flag1);
if (strcmp(flag1,"Y")) {
printf("No file found. End of program.\n");
} else {
len = (int) strlen(skeletonpath);
char *filepath = malloc(len);
for (k = 0; k < len; k++) {
filepath[k] = skeletonpath[k];
}
printf("File found! Filepath: %s\n", filepath);
// Proceed to import sweep
int32_t *sweepRfile = ImportSweeps(filepath);
if (sweepRfile) {
printf("Success!\n");
// Do other things with sweepRfile
free(sweepRfile);
}
free(filepath);
}
return 0;
}
/* Sub-Routines */
void FileSearch(char *dir, char *binname, char *buildfilename, char* filepath, char* flag1)
{
DIR *dp;
struct dirent *entry;
struct stat statbuf;
if((dp = opendir(dir)) == NULL) {
fprintf(stderr,"Cannot open directory: %s\n", dir);
return;
}
chdir(dir);
while((entry = readdir(dp)) != NULL) {
lstat(entry->d_name, &statbuf);
if(S_ISDIR(statbuf.st_mode)) {
/* Found a directory, but ignore . and .. */
if(strcmp(".",entry->d_name) == 0 || strcmp("..",entry->d_name) == 0)
continue;
strcpy(binname,entry->d_name);
strcpy(buildfilename,"SweepR");
/* Recurse at a new indent level */
FileSearch(entry->d_name, binname, buildfilename, filepath, flag1);
}
else {
sprintf(buildfilename, "%s%s.aiff", buildfilename, binname);
if (strcmp(entry->d_name,buildfilename)) {
strcpy(buildfilename,"SweepR");
} else {
sprintf(filepath, "%s%s/%s", filepath, binname, buildfilename);
strcpy(flag1,"Y");
break;
}
}
}
chdir("..");
closedir(dp);
}
int32_t *ImportSweeps(char *filepath)
{
char *filepathread = filepath;
/* Initialize files for importing */
AIFF_Ref fileref;
/* Intialize files for getting information about AIFF file */
uint64_t nSamples;
int32_t *samples = NULL;
int32_t *samplesVec = NULL;
int channels, bitsPerSample, segmentSize, ghost, nsamplepts;
double samplingRate;
/* Import Routine */
fileref = AIFF_OpenFile(filepathread, F_RDONLY) ;
if(fileref)
{
// File opened successfully. Proceed.
ghost = AIFF_GetAudioFormat(fileref, &nSamples, &channels, &samplingRate, &bitsPerSample, &segmentSize);
if (ghost < 1)
{
printf("Error getting audio format.\n");
AIFF_CloseFile(fileref); return (int32_t) 0;
}
nsamplepts = ((int) nSamples)*channels;
samples = malloc(nsamplepts * sizeof(int32_t));
samplesVec = malloc(nsamplepts * sizeof(int32_t));
ghost = AIFF_ReadSamples32Bit(fileref, samples, nsamplepts);
if (ghost) {
for (int k = 0; k < nsamplepts; k++) {
samplesVec[k] = *(samples+k);
}
}
free(samples);
AIFF_CloseFile(fileref);
}
return samplesVec;
}
So... as far as I can see... :-)
samplesVec, the return value of ImportSweeps is not initialized, if fileref is false. Automatic (== local) variables have no guarantees on its value if samplesVec are not explicitly initialized - in other words samplesVec could carry any address. If samplesVec is not NULL on luck (which on the other hand might be often the case), you try free a not allocated junk of memory, or by very bad luck an somewhere else allocated one.
If I'm correct with my guess you can easy fix this with:
int32_t *samples;
int32_t *samplesVec = NULL;
It is a good idea anyway to initialize any variable as soon as possible with some meaningful error or dummy value, if you not use it in the very next line. As pointers are horrible beasts, I always NULL them if I don't initialize them with a useful value on declaration.
Edit: Several minor small changes for a readable approximation to English. :-)
If AIFF_OpenFile fails, ImportSweeps returns an undefined value because samplesVec wasn't initialized. If that value is non-NULL, main will try to free it. You can either initialize samplesVec = NULL, or you can reorganize the code as
fileref = AIFF_OpenFile(filepathread, F_RDONLY) ;
if(!fileref) {
{
// print error message here
return NULL;
}
// File opened successfully. Proceed.
...
There are people who will insist a functon that should only have one exit -- they are poorly informed and voicing a faulty dogma handed down from others who are likewise uninformed and dogmatic. The check for error and return above is known as a guard clause. The alternate style, of indenting every time a test succeeds, yields the arrow anti-pattern that is harder to read, harder to modify, and more error prone. See http://blog.codinghorror.com/flattening-arrow-code/ and http://c2.com/cgi/wiki?ArrowAntiPattern for some discussion.

Read next line in Text file C Programming

I know about fscanf(), fgets() and those other functions to read the next line of a text file. However, if you are given a text file by 'cat msg1.txt | ./anonymizer' would you use the same functions?
For my program the code for the main is:
int main (void)
{
char input[1000]= {'\0'}; //the sentence the user will enter
printf("Enter a sentence:");
scanf("%[^\n]", input);
char newSentence[1000]={'\0'};
sentence=(char *) &newSentence;
line=getText(input,0);
divide(input);
printf("%s\n",sentence);
return 0;
}
In the command line I enter:
gcc -o anonymizer anonymizer.c
cat msg1.txt | ./anonymizer
My msg1 text file contains:
Hi, my email addresses are h.potter#hogwarts.edu and 1a#2b3c#lkj#
Although it's not an email addresses, I'd hate if# you saw my
secret#word. Gary.zenkel#nbcuni.comHoever, input variable only
contains the first line: 'Hi, my email addresses are
h.potter#hogwarts.edu and 1a#2b3c#lkj#'
How can I get the input variable to contain the other two lines?
Almost. While it may not actually be defined in such a way, scanf(...) is essentially equivalent to fscanf(stdin, ...). Similar for gets/fgets. You should be able to use either to read from your standard input stream.
To my limited knowledge (I could be wrong), with the standard libc, there are no efficient ways to read a line when you do not know the max line length. You may get memory overflow with scanf() and gets() because they do not check the length of your buffer. If you use fgets(), you may waste time on frequent strlen() and realloc(). If you use fgetc(), it will be slow as fgetc() has a huge overhead.
For efficient line reading, we have to keep some intermediate information. It is not that easy. I am attaching an implementation. It is quite complicated, but it is very efficient and generic. If you do not care about the details, you may just focus on the main() function about how to use the routines.
To try this program:
gcc -Wall prog.c; ./a.out < input.txt > output.txt
Program:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define kstype_t FILE* // type of file handler
#define ksread_f(fp, buf, len) fread((buf), 1, (len), (fp)) // function to read a data chunk
typedef struct {
int l, m; // l: length of string; m: allocated size
char *s; // string
} kstring_t;
typedef struct {
kstype_t f; // file handler
int begin, end, is_eof, bufsize;
unsigned char *buf; // buffer
} kstream_t;
kstream_t *ks_open(kstype_t fp, int bufsize)
{
kstream_t *ks;
ks = (kstream_t*)calloc(1, sizeof(kstream_t));
ks->bufsize = bufsize;
ks->buf = (unsigned char*)malloc(bufsize);
ks->f = fp;
return ks;
}
void ks_close(kstream_t *ks)
{
free(ks->buf); free(ks);
}
int ks_readline(kstream_t *ks, int delimiter, kstring_t *str)
{
str->l = 0;
if (ks->begin >= ks->end && ks->is_eof) return -1;
for (;;) {
int i;
if (ks->begin >= ks->end) {
if (!ks->is_eof) {
ks->begin = 0;
ks->end = ksread_f(ks->f, ks->buf, ks->bufsize);
if (ks->end < ks->bufsize) ks->is_eof = 1;
if (ks->end == 0) break;
} else break;
}
for (i = ks->begin; i < ks->end; ++i)
if (ks->buf[i] == delimiter) break;
if (str->m - str->l < i - ks->begin + 1) {
str->m = str->l + (i - ks->begin) + 1;
kroundup32(str->m);
str->s = (char*)realloc(str->s, str->m);
}
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin);
str->l = str->l + (i - ks->begin);
ks->begin = i + 1;
if (i < ks->end) break;
}
if (str->s == 0) {
str->m = 1;
str->s = (char*)calloc(1, 1);
}
str->s[str->l] = '\0';
return str->l;
}
int main()
{
kstream_t *ks;
kstring_t str;
str.l = str.m = 0; str.s = 0; // initialize the string struct
ks = ks_open(stdin, 4096); // initialize the file handler
while (ks_readline(ks, '\n', &str) >= 0) // read each line
puts(str.s); // print it out
ks_close(ks); free(str.s); // free
return 0;
}

Resources