ZLIB inflateMark. What application uses this function? - zlib

I am looking for examples and software applications that use inflateMark.

Here's one:
/* gzindex -- build an index for a gzip file and then test it
* Copyright (C) 2009 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
* Version 1.0 20 December 2009 Mark Adler
*/
/* This code demonstrates the use of new capabilities in zlib 1.2.3.4 or later
to create and use a random access index. It is called with the name of a
gzip file on the command line. That file is then indexed and the index is
tested by accessing the blocks in reverse order. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include "zlib.h"
#define CHUNK 1024 /* distance between random access entry points in the
uncompressed data */
#define local static
/* data structure for each random access entry point */
struct point {
off_t head; /* starting bit of coded header in input stream (could
replace with the actual header bits -- the average
dynamic block header length is about 80 bytes), or
-1 if not a coded entry point */
off_t start; /* starting bit of compressed data in input stream --
for a coded block, this may be the start of a code
that generates bytes before the entry point, and so
those bytes need to be decoded and discarded to get
to the entry point */
unsigned offset; /* offset of the entry point in output data for a coded
block (i.e. the output bytes to discard), or number
of bytes remaining in stored block, or zero for an
entry at the start of a block if head == -1 */
int last; /* true if this access point is in the last block --
needed for stored header construction */
};
/* maximum dynamic block header span is less than this -- this also provides
enough space to read six bytes in and produce 258 bytes out */
#define MAXHEAD 289
/* set up the inflate state and the file pointer to decompress with the output
starting at entry, where strm is the state to set (assumed to be already
initialized for raw inflation), the deflate stream is in file gz starting
at offset, index is the list of index points, entry is the element of index,
and sofar is the uncompressed data from the beginning of the stream at least
as far as the entry point -- inflate_entry() will return Z_OK on success, or
zlib error code on failure, where Z_ERRNO is an error reading or seeking the
file gz */
local int inflate_entry(z_stream *strm, FILE *gz, off_t offset,
struct point *index, size_t entry,
unsigned char *sofar)
{
int ret;
unsigned len;
size_t edge;
struct point *point;
unsigned char buf[MAXHEAD];
/* prepare the inflate stream to start anew (assume it's set up for raw) */
ret = inflateReset(strm);
if (ret != Z_OK)
return ret;
/* set the dictionary history for decompression */
point = index + entry;
edge = CHUNK * entry;
if (point->head != -1) /* back up for coded block */
edge -= point->offset;
len = edge < 32768U ? (unsigned)edge : 32768U;
ret = inflateSetDictionary(strm, sofar + edge - len, len);
if (ret != Z_OK)
return ret;
/* set up the inflate state and file pointer to start inflation at the
entry point */
if (point->head == -1)
if (point->offset == 0) {
/* entry point is the start of a deflate block (first block) */
ret = fseeko(gz, offset + (point->start >> 3), SEEK_SET);
if (ret)
return Z_ERRNO;
ret = fread(buf, 1, 1, gz);
if (ret < 0)
return Z_ERRNO;
if (ret == 0)
return Z_BUF_ERROR;
ret = inflatePrime(strm, 8 - ((int)(point->start) & 7),
buf[0] >> ((int)(point->start) & 7));
if (ret != Z_OK)
return ret;
}
else {
/* entry point is inside a stored block -- build a stored block
header to start off with the bytes left in that block at the
entry point */
buf[0] = (unsigned char)(point->last ? 1 : 0);
buf[1] = (unsigned char)(point->offset);
buf[2] = (unsigned char)(point->offset >> 8);
buf[3] = (unsigned char)(~(point->offset));
buf[4] = (unsigned char)(~(point->offset) >> 8);
strm->avail_in = 5;
strm->next_in = buf;
strm->avail_out = 0;
strm->next_out = buf + 5;
ret = inflate(strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
return ret;
/* position input file at next byte to read */
ret = fseeko(gz, offset + (point->start >> 3), SEEK_SET);
if (ret)
return Z_ERRNO;
}
else {
/* entry point is inside a coded block -- run the header through
inflate first */
ret = fseeko(gz, offset + (point->head >> 3), SEEK_SET);
if (ret)
return Z_ERRNO;
ret = fread(buf, 1, MAXHEAD, gz);
if (ret < 0)
return Z_ERRNO;
if (ret == 0)
return Z_BUF_ERROR;
strm->avail_in = (unsigned)ret - 1;
strm->next_in = buf + 1;
ret = inflatePrime(strm, 8 - ((int)(point->head) & 7),
buf[0] >> ((int)(point->head) & 7));
if (ret != Z_OK)
return ret;
strm->avail_out = 0;
strm->next_out = buf + MAXHEAD;
ret = inflate(strm, Z_TREES);
assert(ret != Z_STREAM_ERROR);
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
return ret;
if ((strm->data_type & 256) != 256)
return Z_DATA_ERROR;
ret = inflatePrime(strm, -1, 0); /* discard remaining bits */
if (ret != Z_OK)
return ret;
/* set up to inflate, loading the initial 1..8 bits */
ret = fseeko(gz, offset + (point->start >> 3), SEEK_SET);
if (ret)
return Z_ERRNO;
ret = fread(buf, 1, 1, gz);
if (ret < 0)
return Z_ERRNO;
if (ret == 0)
return Z_BUF_ERROR;
ret = inflatePrime(strm, 8 - ((int)(point->start) & 7),
buf[0] >> ((int)(point->start) & 7));
if (ret != Z_OK)
return ret;
/* discard extra output bytes from this code, if any, to get to the
entry point (max length/distance pair is six bytes) -- move file
pointer back to first unused byte */
if (point->offset) {
ret = fread(buf, 1, 6, gz);
if (ret < 0)
return Z_ERRNO;
strm->avail_in = (unsigned)ret;
strm->next_in = buf;
strm->avail_out = point->offset;
strm->next_out = buf + 6;
ret = inflate(strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR)
return ret;
if (strm->avail_out != 0)
return Z_DATA_ERROR;
ret = fseeko(gz, -(long)(strm->avail_in), SEEK_CUR);
if (ret)
return Z_ERRNO;
}
}
/* return with strm and next byte from gz prepared to decompress starting
at the requested entry point (reset buffers to make user sets them) */
strm->avail_in = 0;
strm->next_in = NULL;
strm->avail_out = 0;
strm->next_out = NULL;
return Z_OK;
}
/* index a raw deflate stream from the file gz starting at offset, and return
an index with number entries -- the uncompressed data is also returned in
data[0..length-1] for use in dictionaries and for comparison */
local int inflate_index(FILE *gz, off_t offset, struct point **index,
size_t *number, unsigned char **data, size_t *length)
{
int ret, last;
size_t size, num, max, n;
off_t pos, head, here;
long left, back;
struct point *list, *list2;
unsigned char *out, *out2;
z_stream strm;
unsigned char in[16384];
/* position input file */
ret = fseeko(gz, offset, SEEK_SET);
if (ret)
return Z_ERRNO;
/* allocate output space to save the data, grow as needed later */
size = 131072L;
out = malloc(size);
if (out == NULL)
return Z_MEM_ERROR;
/* allocate space for index list -- grow as needed later */
max = 512;
list = malloc(max * sizeof(struct point));
if (list == NULL) {
free(out);
return Z_MEM_ERROR;
}
/* allocate inflate state for raw decoding */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, -15);
if (ret != Z_OK) {
free(list);
free(out);
return ret;
}
/* make first index entry to simply start decompressing at beginning */
list[0].head = -1;
list[0].start = 0;
list[0].offset = 0;
list[0].last = 0; /* doesn't matter since not inside stored */
num = 1;
/* inflate the input data, CHUNK output bytes at a time, saving enough
information to randomly access the input data */
pos = head = 0;
last = 0;
strm.next_out = out;
do {
/* if needed, allocate more output space */
if ((size_t)(strm.next_out - out) > size - CHUNK) {
size <<= 1;
if (size <= 0 || (out2 = realloc(out, size)) == NULL) {
(void)inflateEnd(&strm);
free(list);
free(out);
return Z_MEM_ERROR;
}
strm.next_out = out2 + (strm.next_out - out);
out = out2;
}
/* decompress CHUNK more output bytes */
strm.avail_out = CHUNK;
/* for each output CHUNK, feed input data until there is no progress */
do {
/* if needed, get more input data */
if (strm.avail_in == 0) {
strm.avail_in = fread(in, 1, sizeof(in), gz);
if (ferror(gz)) {
(void)inflateEnd(&strm);
free(list);
free(out);
return Z_ERRNO;
}
/* if we get to EOF here, then Houston, we have a problem */
if (strm.avail_in == 0) {
(void)inflateEnd(&strm);
free(list);
free(out);
return Z_DATA_ERROR;
}
pos += strm.avail_in;
strm.next_in = in;
}
/* inflate available input data to fill block, but return early if
we get to a block boundary */
ret = inflate(&strm, Z_BLOCK);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
(void)inflateEnd(&strm);
free(list);
free(out);
return ret;
}
/* if at a block boundary, note the location of the header */
if (strm.data_type & 128) {
head = ((pos - strm.avail_in) << 3) - (strm.data_type & 63);
last = strm.data_type & 64; /* true at end of last block */
}
} while (strm.avail_out != 0 && !last);
/* if got to end of stream, no more entry points needed */
if (last)
break;
/* filled up a block and there's more -- make a new entry */
if (num == max) { /* make more room in list if needed */
max <<= 1;
n = max * sizeof(struct point);
if (n / max != sizeof(struct point) ||
(list2 = realloc(list, n)) == NULL) {
(void)inflateEnd(&strm);
free(list);
free(out);
return Z_MEM_ERROR;
}
list = list2;
}
here = ((pos - strm.avail_in) << 3) - (strm.data_type & 63);
left = inflateMark(&strm);
back = left >> 16;
left &= 0xffff;
if ((back & 0xffff) == 0xffff) { /* signed shift not portable */
list[num].head = -1;
list[num].start = here;
}
else {
list[num].head = head;
list[num].start = here - back;
}
list[num].offset = left;
list[num].last = strm.data_type & 64;
num++;
} while (1);
(void)inflateEnd(&strm);
/* return results */
*index = list;
*number = num;
if (data == NULL || length == NULL)
free(out);
else {
*data = out;
*length = strm.next_out - out;
}
return Z_OK;
}
/* return the offset of the start of deflate data -- return 0 on failure */
local off_t raw_start(FILE *gz)
{
int ret;
off_t pos = 0;
z_stream strm;
unsigned char in[512];
/* set up to decode zlib or gzip header */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 47);
if (ret != Z_OK)
return 0;
/* decode header */
rewind(gz);
strm.avail_out = 0;
strm.next_out = in;
do {
if (strm.avail_in == 0) {
ret = fread(in, 1, sizeof(in), gz);
if (ret <= 0) {
(void)inflateEnd(&strm);
return 0;
}
pos += ret;
strm.avail_in = (unsigned)ret;
strm.next_in = in;
ret = inflate(&strm, Z_BLOCK);
assert(ret != Z_STREAM_ERROR);
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
(void)inflateEnd(&strm);
return 0;
}
}
} while ((strm.data_type & 128) != 128);
(void)inflateEnd(&strm);
return pos - strm.avail_in;
}
/* create an index for the file on the command line and test it */
int main(int argc, char **argv)
{
int ret;
FILE *gz;
off_t start;
struct point *index = NULL;
unsigned char *data = NULL;
size_t n, number = 0, length = 0;
z_stream strm;
unsigned char in[512], out[CHUNK];
/* check zlib version */
if (ZLIB_VERNUM < 0x1234 || ZLIB_VERNUM == 0x123f) {
fputs("gzindex needs zlib version 1.2.3.4 or later", stderr);
return 1;
}
/* set up input file, find start of deflate stream */
if (argc == 1) {
fputs("Usage: gzindex <gzipfile>\n", stderr);
return 1;
}
gz = fopen(argv[1], "rb");
if (gz == NULL) {
fprintf(stderr, "could not open %s\n", argv[1]);
return 1;
}
start = raw_start(gz);
/* create index */
ret = inflate_index(gz, start, &index, &number, &data, &length);
if (ret != Z_OK) {
fprintf(stderr, "indexing error %d\n", ret);
return 1;
}
/* set up raw inflate state for accessing entry points */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, -15);
if (ret != Z_OK) {
free(data);
free(index);
fprintf(stderr, "memory error %d\n", ret);
return 1;
}
/* test index in reverse order */
n = number;
while (n) {
n--;
/* go to entry point n */
ret = inflate_entry(&strm, gz, start, index, n, data);
if (ret != Z_OK) {
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "entry error %d\n", ret);
return 1;
}
/* decompress CHUNK output bytes from here, or what's left */
strm.avail_out = sizeof(out);
strm.next_out = out;
do {
ret = fread(in, 1, sizeof(in), gz);
if (ret < 0) {
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "read error %d\n", errno);
return 1;
}
strm.avail_in = (unsigned)ret;
strm.next_in = in;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "decompression error %d\n", ret);
return 1;
}
} while (strm.avail_out != 0 && ret != Z_STREAM_END);
/* verify the decompressed data */
if ((n == number - 1 &&
sizeof(out) - strm.avail_out != length - n * CHUNK) ||
(n < number - 1 && strm.avail_out != 0)) {
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "decompression shortfall at entry %lu\n", n);
return 1;
}
if (memcmp(out, data + n * CHUNK, sizeof(out) - strm.avail_out) != 0) {
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "compare error for entry %lu\n", n);
return 1;
}
}
/* clean up */
(void)inflateEnd(&strm);
free(data);
free(index);
fprintf(stderr, "%lu entry points generated and successfully tested\n",
number);
return 0;
}

Related

Socket Programming — recv() cannot get all data

I am learning socket programming in C language, and this is an incomprehensible problem I encountered during my study.
Today I am trying to send a HTTP request to my test server which host an Apache example website, then receive the response from test server. Here is a part of my receive code.
unsigned long recv_size = 0;
unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *)malloc(response_size);
while (1)
{
// ret = recv(socket, recv_buff, response_size, MSG_WAITALL); // cannot get all data
ret = read(socket, recv_buff, response_size); // same effect as the above
recv_size += ret;
if (ret < 0)
error(strerror(errno));
else if (ret == 0)
break; // all data recved
}
The normal result of my test with burpsuit is this.
But what I received with the C language program was incomplete data.
I searched the reason for one night, but I still did not find a solution for my problem. Whether it is to set the buff to a super large size or any other method, the complete data cannot be accepted at all.
The traffic monitored from wireshark is ok, but my program still cannot receive the complete data. What is the problem?
If you know why, please let me know. THX. (o゜▽゜)o☆
UPDATE
The while loop will execute twice, and first time the value of ret is 3343, and second time is 0, so the loop will stop here.
You can get a short read on a socket.
But, your code to handle that has a few issues.
You're allocating a buffer of size response_size. You are always reading that amount instead of reducing the amount read by the amount you've already read on a prior loop iteration.
This can cause you to read past the end of the buffer causing UB (undefined behavior).
Your termination condition is if (ret == 0). This can fail if another packet arrives "early". You'll never see a ret of 0, because the partial data from the next packet will make it non-zero
Here's the corrected code:
#if 0
unsigned long recv_size = 0;
#endif
unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *) malloc(response_size);
#if 1
unsigned long remaining_size = response_size;
unsigned long offset = 0;
#endif
for (; remaining_size > 0; remaining_size -= ret, offset += ret) {
ret = read(socket, &recv_buff[offset], remaining_size);
if (ret < 0)
error(strerror(errno));
}
UPDATE:
The above code corrects some of the issues. But, for a variable length source [such as http], we don't know how much to read at the outset.
So, we have to parse the headers and look for the "Content-Length" field. This will tell us how much to read.
So, we'd like to have line oriented input for the headers. Or, manage our own buffer
Assuming we can parse that value, we have to wait for the empty line to denote the start of the payload. And, then we can loop on that exact amount.
Here's some code that attempts the header parsing and saving of the payload. I've coded it, but not compiled it. So, you can take it as pseudo code:
unsigned long recv_size = 0;
unsigned long response_size = 4096;
char *recv_buff = malloc(response_size + 1);
// line oriented header buffer
char *endl = NULL;
unsigned long linelen;
char linebuf[1000];
int ret = 0;
// read headers
while (1) {
// fill up a chunk of data
while (recv_size < response_size) {
recv_buff[recv_size] = 0;
// do we have a line end?
endl = strstr(recv_buff,"\r\n");
if (endl != NULL)
break;
ret = read(socket, &recv_buff[recv_size], response_size - recv_size);
if (ret < 0)
error(strerror(errno));
if (ret == 0)
break;
recv_size += ret;
}
// error -- no line end but short read
if (endl == NULL)
error(strerror(errno));
// copy header to work buffer
linelen = endl - recv_buff;
memcpy(linebuf,recv_buff,linelen);
linebuf[linelen] = 0;
// remove header from receive buffer
linelen += 2;
recv_size -= linelen;
if (recv_size > 0)
memcpy(recv_buff,&recv_buff[linelen],recv_size);
// stop on end of headers (back to back "\r\n")
if ((recv_size >= 2) && (recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
memcpy(recv_buff,&recv_buff[2],recv_size - 2);
recv_size -= 2;
break;
}
// parse line work buffer for keywords ... (e.g.)
content_length = ...;
}
// save payload to file
while (content_length > 0) {
// write out prior payload amount
if (recv_size > 0) {
write(file_fd,recv_buff,recv_size);
content_length -= recv_size;
recv_size = 0;
continue;
}
recv_size = read(socket,recv_buff,response_size);
if (recv_size < 0)
error(strerror(errno));
if (recv_size == 0)
break;
}
UPDATE #2:
Yeah, it hard to make the pseudo code run, and the returned values are all garbled
Okay, here is a soup-to-nuts working version that I've tested against my own http server.
I had to create my own routines for the parts you didn't post (e.g. connect, etc.).
At the core, there might have been a minor tweak to the buffer slide code [it was sliding by an extra 2 bytes in one place], but, otherwise it was pretty close to my previous version
// htprcv/htprcv.c -- HTTP receiver
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <error.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
typedef unsigned char byte;
#define HTPSLIDE(_rmlen) \
recv_size = htpslide(recv_buff,recv_size,_rmlen)
#define _dbgprt(_fmt...) \
fprintf(stderr,_fmt)
#if DEBUG || _USE_ZPRT_
#define dbgprt(_lvl,_fmt...) \
do { \
if (dbgok(_lvl)) \
_dbgprt(_fmt); \
} while (0)
#define dbgexec(_lvl,_expr) \
do { \
if (dbgok(_lvl)) \
_expr; \
} while (0)
#else
#define dbgprt(_lvl,_fmt...) \
do { \
} while (0)
#define dbgexec(_lvl,_expr) \
do { \
} while (0)
#endif
#define dbgok(_lvl) \
opt_d[(byte) #_lvl[0]]
byte opt_d[256];
char *opt_o;
#define HEXMAX 16
// htpconn -- do connect to server
int
htpconn(const char *hostname,unsigned short portno)
{
struct addrinfo hints, *res;
struct hostent *hostent;
int ret;
char portstr[20];
int sockfd;
/* Prepare hint (socket address input). */
hostent = gethostbyname(hostname);
if (hostent == NULL)
error(1,errno,"htpconn: gethostbyname -- %s\n",hostname);
memset(&hints, 0, sizeof hints);
hints.ai_family = AF_INET; // ipv4
hints.ai_socktype = SOCK_STREAM; // tcp
hints.ai_flags = AI_PASSIVE; // fill in my IP for me
sprintf(portstr, "%d", portno);
getaddrinfo(NULL, portstr, &hints, &res);
sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (sockfd < 0)
error(1,errno,"htpconn: socket\n");
// do the actual connection
ret = connect(sockfd, res->ai_addr, res->ai_addrlen);
if (ret < 0)
error(1,errno,"htprcv: read header\n");
return sockfd;
}
// htpslide -- slide buffer (strip out processed data)
size_t
htpslide(char *recv_buff,size_t recv_size,int slidelen)
{
size_t new_size;
if (slidelen > recv_size)
slidelen = recv_size;
new_size = recv_size - slidelen;
dbgprt(S,"htpslide: slidelen=%d recv_size=%zu new_size=%zu\n",
slidelen,recv_size,new_size);
memcpy(&recv_buff[0],&recv_buff[slidelen],new_size);
return new_size;
}
// _htphex -- dump a line in hex
void
_htphex(unsigned long off,const void *vp,size_t xlen)
{
const byte *bp = vp;
int idx;
int chr;
char hexbuf[200];
char alfbuf[200];
char *hexptr = hexbuf;
char *alfptr = alfbuf;
for (idx = 0; idx < HEXMAX; ++idx) {
chr = bp[idx];
if ((idx % 4) == 0)
*hexptr++ = ' ';
if (idx < xlen) {
hexptr += sprintf(hexptr,"%2.2X",chr);
if ((chr < 0x20) || (chr > 0x7E))
chr = '.';
}
else {
hexptr += sprintf(hexptr," ");
chr = ' ';
}
*alfptr++ = chr;
}
*hexptr = 0;
*alfptr = 0;
_dbgprt(" %8.8lX: %s *%s*\n",off,hexbuf,alfbuf);
}
// htphex -- dump a buffer in hex
void
htphex(const char *buf,size_t buflen,const char *reason)
{
size_t off = 0;
size_t xlen;
if (reason != NULL)
_dbgprt("htphex: DUMP buf=%p buflen=%zu (from %s)\n",
buf,buflen,reason);
for (; buflen > 0; buflen -= xlen, buf += xlen, off += xlen) {
xlen = buflen;
if (xlen > HEXMAX)
xlen = HEXMAX;
_htphex(off,buf,xlen);
}
}
// htpsym -- get symbol/value
int
htpsym(char *linebuf,char *sym,char *val)
{
char *cp;
int match;
dbgprt(H,"htpsym: PARAM linebuf='%s'\n",linebuf);
// FORMAT:
// foo-bar: baz
do {
match = 0;
cp = strchr(linebuf,':');
if (cp == NULL)
break;
*cp++ = 0;
strcpy(sym,linebuf);
for (; (*cp == ' ') || (*cp == '\t'); ++cp);
strcpy(val,cp);
match = 1;
dbgprt(H,"htpsym: SYMBOL sym='%s' val='%s'\n",sym,val);
} while (0);
return match;
}
// htprcv -- receive server response
void
htprcv(int sockfd,int fdout)
{
size_t recv_size = 0;
size_t response_size = 4096;
char *recv_buff = malloc(response_size + 1);
// line oriented header buffer
char *endl = NULL;
size_t linelen;
char linebuf[1000];
ssize_t ret = 0;
off_t content_length = 0;
// read headers
while (1) {
// fill up a chunk of data
while (recv_size < response_size) {
recv_buff[recv_size] = 0;
// do we have a line end?
endl = strstr(recv_buff,"\r\n");
if (endl != NULL)
break;
// read a chunk of data
ret = read(sockfd,&recv_buff[recv_size],response_size - recv_size);
if (ret < 0)
error(1,errno,"htprcv: read header\n");
if (ret == 0)
break;
recv_size += ret;
dbgprt(R,"htprcv: READ ret=%zd\n",ret);
dbgexec(R,htphex(recv_buff,recv_size,"htprcv/READ"));
}
// error -- no line end but short read
if (endl == NULL)
error(1,0,"htprcv: no endl\n");
// copy header to work buffer
linelen = endl - recv_buff;
memcpy(linebuf,recv_buff,linelen);
linebuf[linelen] = 0;
// remove header from receive buffer
linelen += 2;
HTPSLIDE(linelen);
// stop on end of headers (back to back "\r\n")
if ((recv_size >= 2) &&
(recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
HTPSLIDE(2);
break;
}
// parse line work buffer for keywords ...
char sym[100];
char val[1000];
if (! htpsym(linebuf,sym,val))
continue;
if (strcasecmp(sym,"Content-Length") == 0) {
content_length = atoi(val);
continue;
}
}
// save payload to file
while (content_length > 0) {
// write out prior payload amount
if (recv_size > 0) {
dbgexec(W,htphex(recv_buff,recv_size,"htprcv/WRITE"));
ret = write(fdout,recv_buff,recv_size);
if (ret < 0)
error(1,errno,"htprcv: write body\n");
content_length -= recv_size;
recv_size = 0;
continue;
}
// read in new chunk of payload
ret = read(sockfd,recv_buff,response_size);
if (ret < 0)
error(1,errno,"htprcv: read body\n");
if (ret == 0)
break;
recv_size = ret;
}
free(recv_buff);
}
// htpget -- do initial dialog
void
htpget(int sockfd,const char *hostname,const char *file)
{
char *bp;
char buf[1024];
ssize_t resid;
ssize_t xlen;
size_t off;
bp = buf;
if (file == NULL)
file = "/";
bp += sprintf(bp,"GET %s HTTP/1.1\r\n",file);
if (hostname == NULL)
hostname = "localhost";
bp += sprintf(bp,"Host: %s\r\n",hostname);
if (0) {
bp += sprintf(bp,"User-Agent: %s\r\n","curl/7.61.1");
}
else {
bp += sprintf(bp,"User-Agent: %s\r\n","htprcv");
}
bp += sprintf(bp,"Accept: */*\r\n");
bp += sprintf(bp,"\r\n");
resid = bp - buf;
off = 0;
for (; resid > 0; resid -= xlen, off += xlen) {
xlen = write(sockfd,buf,resid);
if (xlen < 0)
error(1,errno,"htpget: write error\n");
}
}
// main -- main program
int
main(int argc,char **argv)
{
char *cp;
char *portstr;
unsigned short portno;
int sockfd;
int filefd;
char url[1000];
--argc;
++argv;
//setlinebuf(stdout);
setlinebuf(stderr);
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch(cp[-1]) {
case 'd': // debug options
if (*cp == 0)
cp = "SHRW";
for (; *cp != 0; ++cp)
opt_d[(byte) *cp] = 1;
break;
case 'o': // output file
opt_o = cp;
break;
}
}
// get the remote host:port
do {
if (argc <= 0) {
strcpy(url,"localhost:80");
break;
}
strcpy(url,*argv++);
--argc;
} while (0);
// get remote port number
portstr = strchr(url,':');
if (portstr != NULL)
*portstr++ = 0;
else
portstr = "80";
portno = atoi(portstr);
// open the output file (or send to stdout)
do {
if (opt_o == NULL) {
filefd = 1;
break;
}
filefd = open(opt_o,O_WRONLY | O_CREAT,0644);
if (filefd < 0)
filefd = 1;
} while (0);
// establish connection
sockfd = htpconn(url,portno);
// send the file request
htpget(sockfd,NULL,"/");
// receive the server response
htprcv(sockfd,filefd);
close(sockfd);
return 0;
}

Can the Pagemap folder of processes in the Linux kernel be read(64bit per read) a finite number of times?

I'm trying to keep track of the number of writes per physical page in the file "proc/PID/pagemap".But the file is binary, and the size shown in the file properties is 0, and the following function reads 0 as well.
struct stat buf;
int iRet = fstat(fd, &buf);
if(iRet == -1)
{
perror("fstat error");
exit(-1);
}
printf("the size of file is : %ld\n", buf.st_size);
I write a monitor program to read data from a process's "pagemap" 64bit one time and record the 55-bit(soft dirty bit)to check if one page is written.Of course before doing this I cleared all soft dirty bit in a process's pagemap.This method is provided by linux kernel and my question during coding is that when I use file descriptor(also tried fstream pointer) to get the data from pagemap.My reading of pagemap ends only when the process I'm monitoring is finished, as if the file were infinite.I know the process's logical address mangement is dynamic but I want to know how could I count the write number properly.Should I read a part of this infinite file within a fixed time intervals?And how many items should I read? T _ T.
You need something like the following:
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
struct pagemap_region {
struct pagemap_region *next;
uintptr_t addr; /* First address within region */
uintptr_t ends; /* First address after region */
size_t pages; /* Number of pages in this region */
uint64_t page[]; /* 64-bit pagemap flags per page */
};
static void free_pagemaps(struct pagemap_region *list)
{
while (list) {
struct pagemap_region *curr = list;
list = curr->next;
curr->addr = 0;
curr->ends = 0;
curr->pages = 0;
free(curr);
}
}
struct pagemap_region *get_pagemaps(const pid_t pid)
{
struct pagemap_region *list = NULL;
size_t page;
char *line_ptr = NULL;
size_t line_max = 256;
ssize_t line_len;
FILE *maps;
int n, fd;
page = sysconf(_SC_PAGESIZE);
/* We reuse this for the input line buffer. */
line_ptr = malloc(line_max);
if (!line_ptr) {
errno = ENOMEM;
return NULL;
}
/* First, fill it with the path to the map pseudo-file. */
if (pid > 0)
n = snprintf(line_ptr, line_max, "/proc/%d/maps", (int)pid);
else
n = snprintf(line_ptr, line_max, "/proc/self/maps");
if (n < 0 || (size_t)n + 1 >= line_max) {
free(line_ptr);
errno = EINVAL;
return NULL;
}
/* Read the maps pseudo-file. */
maps = fopen(line_ptr, "re"); /* Read-only, close-on-exec */
if (!maps) {
free(line_ptr);
errno = ESRCH;
return NULL;
}
while (1) {
struct pagemap_region *curr;
unsigned long addr, ends;
size_t pages;
char *ptr, *end;
line_len = getline(&line_ptr, &line_max, maps);
if (line_len < 0)
break;
/* Start address of the region. */
end = ptr = line_ptr;
errno = 0;
addr = strtoul(ptr, &end, 16);
if (errno || end == ptr || *end != '-')
break;
/* End address of the region. */
ptr = ++end;
errno = 0;
ends = strtoul(ptr, &end, 16);
if (errno || end == ptr || *end != ' ')
break;
/* Number of pages in the region. */
pages = (ends - addr) / page;
if (addr + page * pages != ends || (addr % page) != 0)
break;
/* Allocate new region map. */
curr = malloc(sizeof (struct pagemap_region) + pages * sizeof curr->page[0]);
if (!curr)
break;
curr->addr = addr;
curr->ends = ends;
curr->pages = pages;
/* Prepend to the region list. */
curr->next = list;
list = curr;
}
/* Any issues when reading the maps pseudo-file? */
if (!feof(maps) || ferror(maps)) {
fclose(maps);
free(line_ptr);
free_pagemaps(list);
errno = EIO;
return NULL;
} else
if (fclose(maps)) {
free(line_ptr);
free_pagemaps(list);
errno = EIO;
return NULL;
}
/* Reuse the line buffer for the pagemap pseudo-file path */
if (pid > 0)
n = snprintf(line_ptr, line_max, "/proc/%d/pagemap", (int)pid);
else
n = snprintf(line_ptr, line_max, "/proc/self/pagemap");
if (n < 0 || (size_t)n + 1 >= line_max) {
free(line_ptr);
free_pagemaps(list);
errno = ENOMEM;
return NULL;
}
do {
fd = open(line_ptr, O_RDONLY | O_NOCTTY | O_CLOEXEC);
} while (fd == -1 && errno == EINTR);
if (fd == -1) {
n = errno;
free(line_ptr);
free_pagemaps(list);
errno = n;
return NULL;
}
/* Path no longer needed. */
free(line_ptr);
line_ptr = NULL;
line_max = 0;
/* Read each pagemap section. */
for (struct pagemap_region *curr = list; curr != NULL; curr = curr->next) {
off_t offset = (size_t)(curr->addr / page) * (sizeof curr->page[0]);
unsigned char *ptr = (unsigned char *)&(curr->page[0]);
size_t need = curr->pages * sizeof curr->page[0];
ssize_t bytes;
while (need > 0) {
bytes = pread(fd, ptr, need, offset);
if (bytes >= need)
break;
else
if (bytes > 0) {
ptr += bytes;
offset += bytes;
need -= bytes;
} else
if (bytes == 0) {
/* Assume this is a region we can't access, like [VSYSCALL]; clear the rest of the bits. */
memset(ptr, 0, need);
break;
} else
if (bytes != -1 || errno != EINTR) {
close(fd);
free_pagemaps(list);
errno = EIO;
return NULL;
}
}
}
if (close(fd) == -1) {
free_pagemaps(list);
errno = EIO;
return NULL;
}
return list;
}
int main(int argc, char *argv[])
{
struct pagemap_region *list, *curr;
long pid;
char *end;
if (argc != 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *argv0 = (argc > 0 && argv && argv[1]) ? argv[1] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s PID\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "This program prints the a map of the pages of process PID;\n");
fprintf(stderr, "R for pages in RAM, S for pages in swap space, and . for others.\n");
fprintf(stderr, "You can use -1 for the PID of this process itself.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
end = argv[1];
errno = 0;
pid = strtol(argv[1], &end, 10);
if (errno || end == argv[1] || *end) {
fprintf(stderr, "%s: Invalid PID.\n", argv[1]);
return EXIT_FAILURE;
}
if (pid != -1 && (pid < 1 || (long)(pid_t)pid != pid)) {
fprintf(stderr, "%s: Not a valid PID.\n", argv[1]);
return EXIT_FAILURE;
}
list = get_pagemaps(pid);
if (!list) {
fprintf(stderr, "%s.\n", strerror(errno));
return EXIT_FAILURE;
}
for (curr = list; curr != NULL; curr = curr->next) {
printf("Region %p - %p: %zu pages\n", (void *)(curr->addr), (void *)(curr->ends), curr->pages);
for (uint64_t *map = curr->page; map < curr->page + curr->pages; map++) {
if ((*map >> 63) & 1)
putchar('R');
else
if ((*map >> 62) & 1)
putchar('S');
else
putchar('.');
}
putchar('\n');
}
return EXIT_SUCCESS;
}
We read /proc/PID/maps line by line, and construct a struct pagemap_region for each; this contains the start address, the end address, and the number of pages in the region. (I didn't bother to support huge pages, though; if you do, consider parsing /proc/PID/smaps instead. If a line begins with a 0-9 or lowercase a-f, it specifies an region; otherwise the line begins with a capital letter A-Z and specifies a property of that region.)
Each struct pagemap_region also contains room for the 64-bit pagemap value per page. After the regions have been found/chosen – this one tries all –, the /proc/PID/pagemap file is opened, and the corresponding data read from the proper location using pread(), which works like read(), but also takes the file offset as an extra parameter.
Not all regions are accessible. I do believe [VSYSCALL] is one of those, but being a kernel-userspace interface, its pagemap bits are uninteresting anyway. Instead of removing such regions from the list, the above just clears the bits to zero.
This is not intended as a "do it exactly like this, just copy and paste this" answer, but as a suggestion of how to start going about this, perhaps exploring a bit, comparing the results or behaviour to your particular needs; a sort of a rough outline for an initial suggestion only.
Also, as I wrote it in a single sitting, it's likely got nasty bugs in it. (If I knew where or for sure, I'd fix them; it's just that bugs happen.)

How can this function do "write" operation without writing in "Transmitter Register"?

Here's the code from rt_imx_uart.c :
static ssize_t rt_imx_uart_write(struct rtdm_fd *fd, const void *buf,
size_t nbyte)
{
struct rt_imx_uart_ctx *ctx;
rtdm_lockctx_t lock_ctx;
size_t written = 0;
int free;
int block;
int subblock;
int out_pos;
char *in_pos = (char *)buf;
rtdm_toseq_t timeout_seq;
ssize_t ret;
if (nbyte == 0)
return 0;
if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte))
return -EFAULT;
ctx = rtdm_fd_to_private(fd);
rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
/* Make write operation atomic. */
ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.rx_timeout,
&timeout_seq);
if (ret)
return ret;
while (nbyte > 0) {
rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
free = OUT_BUFFER_SIZE - ctx->out_npend;
if (free > 0) {
block = subblock = (nbyte <= free) ? nbyte : free;
out_pos = ctx->out_tail;
rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
/* Do we have to wrap around the buffer end? */
if (out_pos + subblock > OUT_BUFFER_SIZE) {
/* Treat the block between head and buffer
* end separately.
*/
subblock = OUT_BUFFER_SIZE - out_pos;
if (rtdm_fd_is_user(fd)) {
if (rtdm_copy_from_user
(fd,
&ctx->out_buf[out_pos],
in_pos, subblock) != 0) {
ret = -EFAULT;
break;
}
} else
memcpy(&ctx->out_buf[out_pos], in_pos,
subblock);
written += subblock;
in_pos += subblock;
subblock = block - subblock;
out_pos = 0;
}
if (rtdm_fd_is_user(fd)) {
if (rtdm_copy_from_user
(fd, &ctx->out_buf[out_pos],
in_pos, subblock) != 0) {
ret = -EFAULT;
break;
}
} else
memcpy(&ctx->out_buf[out_pos], in_pos, block);
written += subblock;
in_pos += subblock;
nbyte -= block;
rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
ctx->out_tail =
(ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1);
ctx->out_npend += block;
ctx->ier_status |= IER_TX;
rt_imx_uart_start_tx(ctx);
rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
continue;
}
rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
ret = rtdm_event_timedwait(&ctx->out_event,
ctx->config.tx_timeout,
&timeout_seq);
if (ret < 0) {
if (ret == -EIDRM) {
/* Device has been closed -
* return immediately.
*/
ret = -EBADF;
}
break;
}
}
rtdm_mutex_unlock(&ctx->out_lock);
if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) ||
(ret == -ETIMEDOUT)))
ret = written;
return ret;
}
I understand this function is meant to be used when a user-space program wants to write into the device. However I dont understand how this function can do that, since nowhere in the program do we ever write into the Transmitter Register. The start_tx function used only enables a flag and that's all.
PS: here's the link for this driver: Link to the driver
It looks like the function puts the bytes in a buffer and enables the transmit interrupt. The interrupt service routine probably writes to the uart transmit register.

recv() on socket by dynamically allocating space

I'm trying to get the source code of my website using c, I'm able to connect and everything but when I implement the recv() code, it only receives the last few bytes of the source code. I'd like to dynamically allocate space for the buffer to receive more using the C functions malloc and realloc.
This is the code I have so far:
char *buffer = NULL;
unsigned int i = 0;
unsigned long LEN = 200;
unsigned long cur_size = 0;
buffer = (char*)malloc(sizeof(char)*LEN);
do
{
if( status >= LEN )
{
cur_size += status;
buffer = (char*)realloc(buffer, cur_size);
}
status = recv(cSocket, buffer, LEN, 0);
if( status == 0 )
{
printf("Bye\n");
}
else if( status > 0 )
{
printf("%d\n", status);
}
else
{
printf("socket error=%d\n", WSAGetLastError());
break;
}
}while( status > 0 );
printf("%s\n", buffer);
It still doesn't print the whole source code. How should I go about this?
Pseudocode:
buffer = 'len chars';
loop:
if( status >= buffer ) buffer = 'resize to status chars';
status = recv(sock, buffer, len, 0);
end loop
As you resize the buffer in advance this needs to be reflected by its size. Which currently is not the case.
To fix this you could, for example, initialise cur_size with LEN by changing
unsigned long cur_size = 0;
to
unsigned long cur_size = LEN;
Assuming the fix above, you want to append to the buffer and not overwrite it with every call to recv().
To do so change this line
status = recv(cSocket, buffer, LEN, 0);
to be
status = recv(cSocket, buffer + cur_size - LEN, LEN, 0);
A more straight forward approach would be to not track the size of the buffer, but the number of bytes received and just always increase the buffer by a constant size.
Also the two calls to allocate memory can be replaced by one:
char *buffer = NULL;
unsigned long LEN = 200;
unsigned long bytes_received = 0;
unsigned long cur_size = 0;
int status = 0;
do
{
if (bytes_received >= cur_size)
{
char * tmp;
cur_size += LEN;
tmp = realloc(buffer, cur_size);
if (NULL == tmp)
{
fprintf(stderr, "realloc error=%d\n", WSAGetLastError());
break;
}
buffer = tmp;
}
status = recv(cSocket, buffer + bytes_received, LEN, 0);
if (status == 0)
{
printf("Bye\n");
}
else if (status > 0)
{
bytes_received += status;
printf("%d\n", status);
}
else /* < 0 */
{
fprintf(stderr, "socket error=%d\n", WSAGetLastError());
}
} while (status > 0);
printf("%s\n", buffer);
Well, after a bit of research, I came across this website and finally found what I was looking for.
Binary tides
Although it uses linux's fcntl, the windows equivalent is ioctlsocket which is used to set the socket's non-blocking mode.
To see the exact function, visit the website. I modified the version and set my socket to blocking mode.
int total_recv(SOCKET s)
{
int size_recv = 0, total_size = 0, block = 00;
char chunk[BUFLEN];
ioctlsocket(s, FIONBIO, (unsigned long*)&block); // set mode to block
// not necessary but clarification of function, mode is block by
// default
while( 1 )
{
memset(chunk, 0, BUFLEN);
if( ( size_recv = recv(s, chunk, BUFLEN, 0) ) == SOCKET_ERROR )
{
printf("Error receiving\n");
}
else if( size_recv == 0 )
{
break;
}
else
{
total_size += size_recv;
// i used file since console wouldn't show full source code
FILE *fp = NULL;
fp = fopen("source.txt", "a");
fprintf(fp, chunk);
fclose(fp);
}
}
return total_size;
}

Writing memory to socket in chunks in C

I'm attempting to write memory contents to a socket in chunks. I can write files that are smaller than my buffer, but anything else and I'm in deep water.
/* allocate memory for file contents */
char fileContents = malloc(sizeof(char)*filesize);
/* read a file into memory */
read(fileDescriptor, fileContents , filesize);
int chunksWritten;
/* Write the memory to socket? */
if (filesize > MAX_BLOCK_SIZE){
while (chunksWritten < filesize){
// what goes here?
}
} else {
chunksWritten = writen(sd, fileContents, filesize); // this works for files < MAX_BLOCK_SIZE
}
writen here writes to my socket:
int writen(int fd, char *buf, int nbytes) {
short data_size = nbytes;
int n, nw;
if (nbytes > MAX_BLOCK_SIZE)
return (-3);
data_size = htons(data_size);
if (write(fd, (char *) & data_size, 1) != 1) return (-1);
if (write(fd, (char *) (&data_size) + 1, 1) != 1) return (-1);
/* send nbytes */
for (n = 0; n < nbytes; n += nw) {
if ((nw = write(fd, buf + n, nbytes - n)) <= 0)
return (nw);
}
return (n);
}
This seems like it should be quite easy, but I'm struggling to find any good examples.
/* outside the loop */
chunksWritten = 0;
int smaller;
int r;
int sizeRemaining = filesize;
//char *fileChunk = malloc(sizeof(char)*MAX_BLOCK_SIZE+1);
//memcpy(fileChunk, fileContents, sizeof(char)*MAX_BLOCK_SIZE);
//r = writen(sd, fileChunk, MAX_BLOCK_SIZE);
r = writen(sd, fileContents, MAX_BLOCK_SIZE);
if(r==-1) {
/* deal with error in a manner that fits the rest of your program */
}
chunksWritten = chunksWritten + r;
sizeRemaining = sizeRemaining - MAX_BLOCK_SIZE;
while(sizeRemaining > 0){
if(sizeRemaining > MAX_BLOCK_SIZE){
smaller = MAX_BLOCK_SIZE;
} else {
smaller = sizeRemaining;
}
//memcpy(fileChunk, fileContents+sizeof(char)*chunksWritten, sizeof(char)*smaller);
//r = writen(sd, fileChunk, MAX_BLOCK_SIZE);
r = writen(sd, fileContents[filesize - sizeRemaining], smaller);
if(r==-1) {
/* deal with error in a manner that fits the rest of your program */
}
sizeRemaining = sizeRemaining - MAX_BLOCK_SIZE;
}
/*
Reminder: clean-up fileChunk & fileContents if you don't need them later on
*/
You certainly can rework the loop to count up instead of down. I can think better counting down.
Edit: made a few changes based on comments.
Not sure why you want this, but seems like you want something like:
#define MIN(x, y) ((x) < (y) ? (x) : (y))
while (chunksWritten < filesize) {
int writtenThisPass = writen(fd,
fileContents + chunksWritten,
MIN(filesize - chunksWritten, MAX_BLOCK_SIZE));
if (writtenThisPass <= 0)
{
// TODO: handle the error
}
else
{
chunksWritten += writtenThisPass;
}
}

Resources