How to properly set up ALSA device - c

Edit: This question is different than the proposed duplicate because I'm asking How do you set the period/buffer size that will work with multiple targets each with different sound hardware?.
I have created some code that attempts to set up ALSA before playback of an OGG file. The code below works on one embedded Linux platform, but on another it fails with the following output:
Error setting buffersize.
Playback open error: Operation not permitted
I've included only the code that demonstrates the issue. setup_alsa() is not complete and won't completely configure an alsa device.
#include <alsa/asoundlib.h>
char *buffer;
static char *device = "default";
snd_pcm_uframes_t periodsize = 8192; /* Periodsize (bytes) */
int setup_alsa(snd_pcm_t *handle)
{
int rc;
int dir = 0;
snd_pcm_uframes_t periods; /* Number of fragments/periods */
snd_pcm_hw_params_t *params;
snd_pcm_sw_params_t *sw_params;
int rate = 44100;
int exact_rate;
int i = 0;
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
if (snd_pcm_hw_params_any(handle, params) < 0)
{
fprintf(stderr, "Can not configure this PCM device.\n");
snd_pcm_close(handle);
return(-1);
}
/* Set number of periods. Periods used to be called fragments. */
periods = 4;
if ( snd_pcm_hw_params_set_periods(handle, params, periods, 0) < 0 )
{
fprintf(stderr, "Error setting periods.\n");
snd_pcm_close(handle);
return(-1);
}
/* Set buffer size (in frames). The resulting latency is given by */
/* latency = periodsize * periods / (rate * bytes_per_frame) */
if (snd_pcm_hw_params_set_buffer_size(handle, params, (periodsize * periods)>>2) < 0)
{
fprintf(stderr, "Error setting buffersize.\n");
snd_pcm_close(handle);
return(-1);
}
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
snd_pcm_close(handle);
return -1;
}
snd_pcm_hw_params_free(params);
What is the normal way to setup ALSA that doesn't require a specific buffer/period size be set that provides smooth audio playback?**

As it turns out, I can program my ALSA setup routine to let ALSA determine what the nearest working period/buffer size is by using snd_pcm_hw_params_set_buffer_size_near() instead of snd_pcm_hw_params_set_buffer_size().
The following code now works on both platforms:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <vorbis/vorbisfile.h>
#include <alsa/asoundlib.h>
char *buffer;
//static char *device = "default";
static char *device = "plughw:0,0";
snd_pcm_uframes_t periodsize = 4096; /* Periodsize (bytes) */
int setup_alsa(snd_pcm_t *handle)
{
int rc;
int dir = 0;
snd_pcm_uframes_t periods; /* Number of fragments/periods */
snd_pcm_hw_params_t *params;
snd_pcm_sw_params_t *sw_params;
int rate = 44100;
int exact_rate;
int i = 0;
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_malloc(&params);
/* Fill it in with default values. */
if (snd_pcm_hw_params_any(handle, params) < 0)
{
fprintf(stderr, "Can not configure this PCM device.\n");
snd_pcm_close(handle);
return(-1);
}
/* Set the desired hardware parameters. */
/* Non-Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_NONINTERLEAVED);
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
/* 44100 bits/second sampling rate (CD quality) */
/* Set sample rate. If the exact rate is not supported */
/* by the hardware, use nearest possible rate. */
exact_rate = rate;
if (snd_pcm_hw_params_set_rate_near(handle, params, &exact_rate, 0) < 0)
{
fprintf(stderr, "Error setting rate.\n");
snd_pcm_close(handle);
return(-1);
}
if (rate != exact_rate)
{
fprintf(stderr, "The rate %d Hz is not supported by your hardware.\n==> Using %d Hz instead.\n", rate, exact_rate);
}
/* Set number of channels to 1 */
if( snd_pcm_hw_params_set_channels(handle, params, 1 ) < 0 )
{
fprintf(stderr, "Error setting channels.\n");
snd_pcm_close(handle);
return(-1);
}
/* Set number of periods. Periods used to be called fragments. */
periods = 4;
if ( snd_pcm_hw_params_set_periods(handle, params, periods, 0) < 0 )
{
fprintf(stderr, "Error setting periods.\n");
snd_pcm_close(handle);
return(-1);
}
snd_pcm_uframes_t size = (periodsize * periods) >> 2;
if( (rc = snd_pcm_hw_params_set_buffer_size_near( handle, params, &size )) < 0)
{
fprintf(stderr, "Error setting buffersize: [%s]\n", snd_strerror(rc) );
snd_pcm_close(handle);
return(-1);
}
else
{
printf("Buffer size = %lu\n", (unsigned long)size);
}
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
snd_pcm_close(handle);
return -1;
}
snd_pcm_hw_params_free(params);
/* Allocate a software parameters object. */
rc = snd_pcm_sw_params_malloc(&sw_params);
if( rc < 0 )
{
fprintf (stderr, "cannot allocate software parameters structure (%s)\n", snd_strerror(rc) );
return(-1);
}
rc = snd_pcm_sw_params_current(handle, sw_params);
if( rc < 0 )
{
fprintf (stderr, "cannot initialize software parameters structure (%s)\n", snd_strerror(rc) );
return(-1);
}
if((rc = snd_pcm_sw_params_set_avail_min(handle, sw_params, 1024)) < 0)
{
fprintf (stderr, "cannot set minimum available count (%s)\n", snd_strerror (rc));
return(-1);
}
rc = snd_pcm_sw_params_set_start_threshold(handle, sw_params, 1);
if( rc < 0 )
{
fprintf(stderr, "Error setting start threshold\n");
snd_pcm_close(handle);
return -1;
}
if((rc = snd_pcm_sw_params(handle, sw_params)) < 0)
{
fprintf (stderr, "cannot set software parameters (%s)\n", snd_strerror (rc));
return(-1);
}
snd_pcm_sw_params_free(sw_params);
return 0;
}
/* copied from libvorbis source */
int ov_fopen(const char *path, OggVorbis_File *vf)
{
int ret = 0;
FILE *f = fopen(path, "rb");
if( f )
{
ret = ov_open(f, vf, NULL, 0);
if( ret )
{
fclose(f);
}
}
else
{
ret = -1;
}
return( ret );
}
int main(int argc, char *argv[])
{
// sample rate * bytes per sample * channel count * seconds
//int bufferSize = 44100 * 2 * 1 * 2;
int err;
snd_pcm_t *handle;
snd_pcm_sframes_t frames;
buffer = (char *) malloc( periodsize );
if( buffer )
{
if((err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, 0)) < 0)
{
printf("Playback open error #1: %s\n", snd_strerror(err));
exit(EXIT_FAILURE);
}
if(err = setup_alsa(handle))
{
printf("Playback open error #2: %s\n", snd_strerror(err));
exit(EXIT_FAILURE);
}
OggVorbis_File vf;
int eof = 0;
int current_section;
err = ov_fopen(argv[1], &vf);
if(err != 0)
{
perror("Error opening file");
}
else
{
vorbis_info *vi = ov_info(&vf, -1);
fprintf(stderr, "Bitstream is %d channel, %ldHz\n", vi->channels, vi->rate);
fprintf(stderr, "Encoded by: %s\n\n", ov_comment(&vf, -1)->vendor);
while(!eof)
{
long ret = ov_read(&vf, buffer, periodsize, 0, 2, 1, &current_section);
if(ret == 0)
{
/* EOF */
eof = 1;
}
else if(ret < 0)
{
/* error in the stream. */
fprintf( stderr, "ov_read error %l", ret );
}
else
{
frames = snd_pcm_writen(handle, (void *)&buffer, ret/2);
if(frames < 0)
{
printf("snd_pcm_writen failed: %s\n", snd_strerror(frames));
if( frames == -EPIPE )
{
snd_pcm_prepare(handle);
//frames = snd_pcm_writen(handle, (void *)&buffer, ret/2);
}
else
{
break;
}
}
}
}
ov_clear(&vf);
}
free( buffer );
snd_pcm_drain(handle);
snd_pcm_close(handle);
}
return 0;
}

Related

V4L2 VIDIOC_REQBUFS cannot allocate enough memory

I am writing a MIPI driver without using the I2C functionality, since it is not possible for me to use it. I am writing this for the Google Coral.
To write this new driver I looked at this example and adjusted it to only use the MMAP functionality.
My new code is this:
/*
* V4L2 video capture example
*
* This program can be used and distributed without restrictions.
*
* This program is provided with the V4L2 API
* see http://linuxtv.org/docs.php for more information
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <getopt.h> /* getopt_long() */
#include <fcntl.h> /* low-level i/o */
#include <unistd.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <linux/videodev2.h>
#define CLEAR(x) memset(&(x), 0, sizeof(x))
#ifndef V4L2_PIX_FMT_H264
#define V4L2_PIX_FMT_H264 v4l2_fourcc('H', '2', '6', '4') /* H264 with start codes */
#endif
struct buffer {
void *start;
size_t length;
};
static char *dev_name;
static int fd = -1;
struct buffer *buffers;
static unsigned int n_buffers;
static int out_buf;
static int force_format;
static int frame_count = 200;
static int frame_number = 0;
static void errno_exit(const char *s)
{
fprintf(stderr, "%s error %d, %s\n", s, errno, strerror(errno));
exit(EXIT_FAILURE);
}
static int xioctl(int fh, int request, void *arg)
{
int r;
do {
r = ioctl(fh, request, arg);
} while (-1 == r && EINTR == errno);
return r;
}
static void process_image(const void *p, int size)
{
printf("processing image\n");
frame_number++;
char filename[15];
sprintf(filename, "frame-%d.raw", frame_number); // filename becomes frame-x.raw
FILE *fp=fopen(filename,"wb");
if (out_buf)
fwrite(p, size, 1, fp); // write data to file fp
fflush(fp);
fclose(fp);
}
static int read_frame(void)
{
printf("reading frame\n");
struct v4l2_buffer buf;
unsigned int i;
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
if (-1 == xioctl(fd, VIDIOC_DQBUF, &buf)) {
switch (errno) {
case EAGAIN:
return 0;
case EIO:
/* Could ignore EIO, see spec. */
/* fall through */
default:
errno_exit("VIDIOC_DQBUF");
}
}
assert(buf.index < n_buffers);
process_image(buffers[buf.index].start, buf.bytesused);
if (-1 == xioctl(fd, VIDIOC_QBUF, &buf))
errno_exit("VIDIOC_QBUF");
return 1;
}
static void mainloop(void)
{
printf("mainloop\n");
unsigned int count;
count = frame_count;
while (count-- > 0) {
printf("count number = %d\n", count );
for (;;) {
fd_set fds;
struct timeval tv;
int r;
FD_ZERO(&fds); // clear file descriptor
FD_SET(fd, &fds); // set file descriptors to the descriptor fd
/* Timeout. */
tv.tv_sec = 2;
tv.tv_usec = 0;
r = select(fd + 1, &fds, NULL, NULL, &tv); // select uses a timeout, allows program to monitor file descriptors waiting untill files becomes "ready"
// returns the number of file descriptors changed. This maybe zero if timeout expires.
// probably watching reafds descriptor to change?
if (-1 == r) {
if (EINTR == errno)
continue;
errno_exit("select");
}
if (0 == r) {
fprintf(stderr, "select timeout\n");
exit(EXIT_FAILURE);
}
if (read_frame()) // if one of the descriptors is set, a frame can be read.
break;
/* EAGAIN - continue select loop. */
}
}
printf("mainloop ended\n");
}
static void stop_capturing(void)
{
printf("stop capturing\n");
enum v4l2_buf_type type;
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (-1 == xioctl(fd, VIDIOC_STREAMOFF, &type))
errno_exit("VIDIOC_STREAMOFF");
printf("capturing stopped\n");
}
static void start_capturing(void)
{
printf("initiating capturing\n");
unsigned int i;
enum v4l2_buf_type type;
for (i = 0; i < n_buffers; ++i) {
struct v4l2_buffer buf;
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (-1 == xioctl(fd, VIDIOC_QBUF, &buf))
errno_exit("VIDIOC_QBUF");
}
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (-1 == xioctl(fd, VIDIOC_STREAMON, &type)){
errno_exit("VIDIOC_STREAMON");
}
printf("capturing initiated\n");
}
static void uninit_device(void)
{
unsigned int i;
for (i = 0; i < n_buffers; ++i){
if (-1 == munmap(buffers[i].start, buffers[i].length)){
errno_exit("munmap");
}
}
free(buffers);
}
static void init_mmap(void)
{
printf("initiating mmap buffer\n");
struct v4l2_requestbuffers req; //struct with details of the buffer to compose
CLEAR(req);
req.count = 4;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (-1 == xioctl(fd, VIDIOC_REQBUFS, &req)) { // initiate buffer
if (EINVAL == errno) {
fprintf(stderr, "%s does not support "
"memory mapping\n", dev_name);
exit(EXIT_FAILURE);
} else {
errno_exit("VIDIOC_REQBUFS");
}
}
printf("memory allocated\n");
if (req.count < 2) {
fprintf(stderr, "Insufficient buffer memory on %s\n",
dev_name);
exit(EXIT_FAILURE);
}
buffers = calloc(req.count, sizeof(*buffers)); // make the amount of buffers available
if (!buffers) {
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
for (n_buffers = 0; n_buffers < req.count; ++n_buffers) { // go through buffers and adjust struct in it
struct v4l2_buffer buf;
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = n_buffers;
if (-1 == xioctl(fd, VIDIOC_QUERYBUF, &buf))
errno_exit("VIDIOC_QUERYBUF");
buffers[n_buffers].length = buf.length;
buffers[n_buffers].start =
mmap(NULL /* start anywhere */,
buf.length,
PROT_READ | PROT_WRITE /* required */,
MAP_SHARED /* recommended */,
fd, buf.m.offset);
if (MAP_FAILED == buffers[n_buffers].start)
errno_exit("mmap");
}
printf("mmap buffer initiated\n");
}
static void init_device(void)
{
printf("initiating device\n");
struct v4l2_capability cap;
struct v4l2_cropcap cropcap;
struct v4l2_crop crop;
struct v4l2_format fmt;
unsigned int min;
if (-1 == xioctl(fd, VIDIOC_QUERYCAP, &cap)) { // gets information about driver and harware capabilities
if (EINVAL == errno) { // driver is not compatible with specifications
fprintf(stderr, "%s is no V4L2 device\n",
dev_name);
exit(EXIT_FAILURE);
} else {
errno_exit("VIDIOC_QUERYCAP");
}
}
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
fprintf(stderr, "%s is no video capture device\n",
dev_name);
exit(EXIT_FAILURE);
}
if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
fprintf(stderr, "%s does not support streaming i/o\n",
dev_name);
exit(EXIT_FAILURE);
}
/* Select video input, video standard and tune here. */
CLEAR(cropcap);
cropcap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (0 == xioctl(fd, VIDIOC_CROPCAP, &cropcap)) { // used to get cropping limits, pixel aspects, ... fill in type field and get all this information back
crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
crop.c = cropcap.defrect; /* reset to default */
if (-1 == xioctl(fd, VIDIOC_S_CROP, &crop)) { // get cropping rectangle
switch (errno) {
case EINVAL:
printf("EINVAL in VIDIOC_S_CROP\n");
/* Cropping not supported. */
break;
default:
printf("other error in VIDIOC_S_CROP\n");
/* Errors ignored. */
break;
}
}
} else {
/* Errors ignored. */
}
CLEAR(fmt); // set the format of the v4l2 video
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (force_format) {
fprintf(stderr, "Set H264\r\n");
fmt.fmt.pix.width = 640; //replace
fmt.fmt.pix.height = 480; //replace
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_H264; //replace
fmt.fmt.pix.field = V4L2_FIELD_ANY;
if (-1 == xioctl(fd, VIDIOC_S_FMT, &fmt))
errno_exit("VIDIOC_S_FMT");
/* Note VIDIOC_S_FMT may change width and height. */
} else {
/* Preserve original settings as set by v4l2-ctl for example */
if (-1 == xioctl(fd, VIDIOC_G_FMT, &fmt))
errno_exit("VIDIOC_G_FMT");
}
/* Buggy driver paranoia. */
min = fmt.fmt.pix.width * 2;
if (fmt.fmt.pix.bytesperline < min)
fmt.fmt.pix.bytesperline = min;
min = fmt.fmt.pix.bytesperline * fmt.fmt.pix.height;
if (fmt.fmt.pix.sizeimage < min)
fmt.fmt.pix.sizeimage = min;
init_mmap();
printf("device inititiated\n");
}
static void close_device(void)
{
printf("closing device\n");
if (-1 == close(fd))
errno_exit("close");
fd = -1;
printf("device closed\n");
}
/*
struct stat {
dev_t st_dev; ID of device containing file
ino_t st_ino; Inode number
mode_t st_mode; File type and mode
nlink_t st_nlink; Number of hard links
uid_t st_uid; User ID of owner
gid_t st_gid; Group ID of owner
dev_t st_rdev; Device ID (if special file)
off_t st_size; Total size, in bytes
blksize_t st_blksize; Block size for filesystem I/O
blkcnt_t st_blocks; Number of 512B blocks allocated
Since Linux 2.6, the kernel supports nanosecond
precision for the following timestamp fields.
For the details before Linux 2.6, see NOTES.
struct timespec st_atim; Time of last access
struct timespec st_mtim; Time of last modification
struct timespec st_ctim; Time of last status change
#define st_atime st_atim.tv_sec Backward compatibility
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};
*/
static void open_device(void)
{
printf("openening device\n");
struct stat st;
if (-1 == stat(dev_name, &st)) { // stat() returns info about file into struct
fprintf(stderr, "Cannot identify '%s': %d, %s\n",
dev_name, errno, strerror(errno));
exit(EXIT_FAILURE);
}
if (!S_ISCHR(st.st_mode)) {
fprintf(stderr, "%s is no device\n", dev_name);
exit(EXIT_FAILURE);
}
fd = open(dev_name, O_RDWR /* required */ | O_NONBLOCK, 0); // open the file dev/video0, returns a file descriptor
// if fd == -1, the file could not be opened.
if (-1 == fd) {
fprintf(stderr, "Cannot open '%s': %d, %s\n",
dev_name, errno, strerror(errno));
exit(EXIT_FAILURE);
}
printf("device opened\n");
}
int main(int argc, char **argv)
{
printf("main begins\n");
dev_name = "/dev/video0";
for (;;) {
printf("back here\n");
break;
}
open_device();
init_device();
start_capturing();
mainloop();
stop_capturing();
uninit_device();
close_device();
fprintf(stderr, "\n");
return 0;
}
However I get the following error:
VIDIOC_REQBUFS error 12, Cannot allocate memory
The entire output is:
main begins
back here
openening device
device opened
initiating device
initiating mmap buffer
VIDIOC_REQBUFS error 12, Cannot allocate memory
make: *** [makefile:3: all] Error 1
In the above code this is caused by:
if (-1 == xioctl(fd, VIDIOC_REQBUFS, &req)) { // initiate buffer
if (EINVAL == errno) {
fprintf(stderr, "%s does not support "
"memory mapping\n", dev_name);
exit(EXIT_FAILURE);
} else {
errno_exit("VIDIOC_REQBUFS");
}
}
Thus the ioctl(fd, VIDIOC_REQBUFS, &req) causes this error.
I have already looked on StackOverflow and found 1 other person with the same mistake.
He suggested to change CONFIG_CMA_SIZE_MBYTES to 32 from 16. I tried this by looking where I could find this setting. I found it in: boot/config-4.14.98-imx . However, it was already 320. (yes tenfold). I am now rather stuck on this. Is there a problem in my code, or do I need to change the setting from 320 to 32 (which seems counterintuitive).
With kind regards.

FFMPEG libx265 encoding leaves memory unfreed after avcodec_free_context

I am working on H265 encoding software and, in my unit tests, I have some weird memory leaks. To found them, I have modified the encode_video.c example from FFMPEG documentation. I have changed the resolution to correspond at a 4K video, I have adapted the bitrate and I have added a pause before context allocation and another one before the final return :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
static void encode(AVCodecContext *enc_ctx, AVFrame *frame, AVPacket *pkt,
FILE *outfile)
{
int ret;
/* send the frame to the encoder */
if (frame)
printf("Send frame %3"PRId64"\n", frame->pts);
ret = avcodec_send_frame(enc_ctx, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame for encoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
exit(1);
}
printf("Write packet %3"PRId64" (size=%5d)\n", pkt->pts, pkt->size);
fwrite(pkt->data, 1, pkt->size, outfile);
av_packet_unref(pkt);
}
}
int main(int argc, char **argv)
{
const char *filename, *codec_name;
const AVCodec *codec;
AVCodecContext *c= NULL;
int i, ret, x, y;
FILE *f;
AVFrame *frame;
AVPacket *pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
if (argc <= 2) {
fprintf(stderr, "Usage: %s <output file> <codec name>\n", argv[0]);
exit(0);
}
filename = argv[1];
codec_name = argv[2];
sleep(10);
/* find the mpeg1video encoder */
codec = avcodec_find_encoder_by_name(codec_name);
if (!codec) {
fprintf(stderr, "Codec '%s' not found\n", codec_name);
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* put sample parameters */
c->bit_rate = 1000000;
/* resolution must be a multiple of two */
c->width = 3840;
c->height = 2160;
/* frames per second */
c->time_base = (AVRational){1, 25};
c->framerate = (AVRational){25, 1};
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open codec: %s\n", av_err2str(ret));
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate the video frame data\n");
exit(1);
}
/* encode 1 second of video */
for (i = 0; i < 25; i++) {
fflush(stdout);
/* make sure the frame data is writable */
ret = av_frame_make_writable(frame);
if (ret < 0)
exit(1);
/* prepare a dummy image */
/* Y */
for (y = 0; y < c->height; y++) {
for (x = 0; x < c->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < c->height/2; y++) {
for (x = 0; x < c->width/2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i;
/* encode the image */
encode(c, frame, pkt, f);
}
/* flush the encoder */
encode(c, NULL, pkt, f);
/* add sequence end code to have a real MPEG file */
if (codec->id == AV_CODEC_ID_MPEG1VIDEO || codec->id == AV_CODEC_ID_MPEG2VIDEO)
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
sleep(10);
return 0;
}
I was expecting that the RAM memory usage at the first pause is the same as the second pause but there is about 55 Mo of difference. If I increase the number of encoded frames, this difference up to 390 Mo. I have tested this code under Linux Mint LMDE 4 (roughly same as Debian 10).
I guess this memory "leak" it isn't a real memory leak but that it's some internal values used by libx265 to be maybe reused for another encoding. But has there a way to free this memory through FFMPEG API?

FFMPEG RTSP Server using muxing doc example

I am trying to develop RTSP server using FFMPEG. For that I slightly modified muxing file located at doc/example/ folder inside FFMPEG repository.
Giving my source code of RTSP server example:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#define STREAM_DURATION 10.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* audio output */
static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
uint64_t channel_layout,
int sample_rate, int nb_samples)
{
AVFrame *frame = av_frame_alloc();
int ret;
if (!frame) {
fprintf(stderr, "Error allocating an audio frame\n");
exit(1);
}
frame->format = sample_fmt;
frame->channel_layout = channel_layout;
frame->sample_rate = sample_rate;
frame->nb_samples = nb_samples;
if (nb_samples) {
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Error allocating an audio buffer\n");
exit(1);
}
}
return frame;
}
static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
AVCodecContext *c;
int nb_samples;
int ret;
AVDictionary *opt = NULL;
c = ost->enc;
/* open it */
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
exit(1);
}
/* init signal generator */
ost->t = 0;
ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
/* increment frequency by 110 Hz per second */
ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
nb_samples = 10000;
else
nb_samples = c->frame_size;
ost->frame = alloc_audio_frame(c->sample_fmt, c->channel_layout,
c->sample_rate, nb_samples);
ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, c->channel_layout,
c->sample_rate, nb_samples);
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
/* create resampler context */
ost->swr_ctx = swr_alloc();
if (!ost->swr_ctx) {
fprintf(stderr, "Could not allocate resampler context\n");
exit(1);
}
/* set options */
av_opt_set_int (ost->swr_ctx, "in_channel_count", c->channels, 0);
av_opt_set_int (ost->swr_ctx, "in_sample_rate", c->sample_rate, 0);
av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_int (ost->swr_ctx, "out_channel_count", c->channels, 0);
av_opt_set_int (ost->swr_ctx, "out_sample_rate", c->sample_rate, 0);
av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0);
/* initialize the resampling context */
if ((ret = swr_init(ost->swr_ctx)) < 0) {
fprintf(stderr, "Failed to initialize the resampling context\n");
exit(1);
}
}
/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
* 'nb_channels' channels. */
static AVFrame *get_audio_frame(OutputStream *ost)
{
AVFrame *frame = ost->tmp_frame;
int j, i, v;
int16_t *q = (int16_t*)frame->data[0];
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, ost->enc->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
for (j = 0; j <frame->nb_samples; j++) {
v = (int)(sin(ost->t) * 10000);
for (i = 0; i < ost->enc->channels; i++)
*q++ = v;
ost->t += ost->tincr;
ost->tincr += ost->tincr2;
}
frame->pts = ost->next_pts;
ost->next_pts += frame->nb_samples;
return frame;
}
/*
* encode one audio frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
{
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
AVFrame *frame;
int ret;
int got_packet;
int dst_nb_samples;
av_init_packet(&pkt);
c = ost->enc;
frame = get_audio_frame(ost);
if (frame) {
/* convert samples from native format to destination codec format, using the resampler */
/* compute destination number of samples */
dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
c->sample_rate, c->sample_rate, AV_ROUND_UP);
av_assert0(dst_nb_samples == frame->nb_samples);
/* when we pass a frame to the encoder, it may keep a reference to it
* internally;
* make sure we do not overwrite it here
*/
ret = av_frame_make_writable(ost->frame);
if (ret < 0)
exit(1);
/* convert to destination format */
ret = swr_convert(ost->swr_ctx,
ost->frame->data, dst_nb_samples,
(const uint8_t **)frame->data, frame->nb_samples);
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
exit(1);
}
frame = ost->frame;
frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
ost->samples_count += dst_nb_samples;
}
ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
exit(1);
}
if (got_packet) {
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
if (ret < 0) {
fprintf(stderr, "Error while writing audio frame: %s\n",
av_err2str(ret));
exit(1);
}
}
return (frame || got_packet) ? 0 : 1;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
av_init_packet(&pkt);
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));
exit(1);
}
if (got_packet) {
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
} else {
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: %s\n", av_err2str(ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
sws_freeContext(ost->sws_ctx);
swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec *audio_codec, *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "rtsp", filename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
if (have_audio)
open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, filename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
encode_audio = !write_audio_frame(oc, &audio_st);
}
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
After compiling it, I am running binary:
$ ./muxing rtsp://127.0.0.1/test
Output #0, rtsp, to 'rtsp://127.0.0.1/test':
Stream #0:0: Video: mpeg4, yuv420p, 352x288, q=2-31, 400 kb/s, 25 tbn
Stream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 64 kb/s
[tcp # 0x2b9d220] Connection to tcp://127.0.0.1:554?timeout=0 failed: Connection refused
Error occurred when opening output file: Connection refused
But getting Connection refused error,
I created this repository for rtsp server using ffserver code:
https://github.com/harshil1991/ffserver.git
Now I can able to integrate this source code in my existing repo.

How to open a .sw file using libav library in c language?

I created a .sw(16 bit pcm) file by passing an audio file. Now I am trying to get back the original audio(.mp3) by passing the .sw file as an input to the following file.
How can I read the .sw file content so that I can get back the mp3 file. Below is the code,
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/frame.h>
#include <libavutil/samplefmt.h>
/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt)
{
const enum AVSampleFormat *p = codec->sample_fmts;
while (*p != AV_SAMPLE_FMT_NONE) {
if (*p == sample_fmt)
return 1;
p++;
}
return 0;
}
/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec)
{
const int *p;
int best_samplerate = 0;
if (!codec->supported_samplerates)
return 44100;
p = codec->supported_samplerates;
while (*p) {
if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
best_samplerate = *p;
p++;
}
return best_samplerate;
}
/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec)
{
const uint64_t *p;
uint64_t best_ch_layout = 0;
int best_nb_channels = 0;
if (!codec->channel_layouts)
return AV_CH_LAYOUT_STEREO;
p = codec->channel_layouts;
while (*p) {
int nb_channels = av_get_channel_layout_nb_channels(*p);
if (nb_channels > best_nb_channels) {
best_ch_layout = *p;
best_nb_channels = nb_channels;
}
p++;
}
return best_ch_layout;
}
static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
FILE *output)
{
int ret;
/* send the frame for encoding */
ret = avcodec_send_frame(ctx, frame);
if (ret < 0) {
fprintf(stderr, "Error sending the frame to the encoder\n");
exit(1);
}
/* read all the available output packets (in general there may be any
* number of them */
while (ret >= 0) {
ret = avcodec_receive_packet(ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
exit(1);
}
fwrite(pkt->data, 1, pkt->size, output);
av_packet_unref(pkt);
}
}
int main(int argc, char **argv)
{
const char *filename;
const AVCodec *codec;
AVCodecContext *c= NULL;
AVFrame *frame;
AVPacket *pkt;
int i, j, k, ret;
FILE *f;
uint16_t *samples;
float t, tincr;
av_register_all();
avcodec_register_all();
if (argc <= 1) {
fprintf(stderr, "Usage: %s <output file>\n", argv[0]);
return 0;
}
filename = argv[1];
/* find the MP2 encoder */
codec = avcodec_find_encoder(AV_CODEC_ID_MP3);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate audio codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 64000;
/* check that the encoder supports s16 pcm input */
c->sample_fmt = AV_SAMPLE_FMT_S16P;
if (!check_sample_fmt(codec, c->sample_fmt)) {
fprintf(stderr, "Encoder does not support sample format %s",
av_get_sample_fmt_name(c->sample_fmt));
exit(1);
}
/* select other audio parameters supported by the encoder */
c->sample_rate = select_sample_rate(codec);
c->channel_layout = select_channel_layout(codec);
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
/* packet for holding encoded output */
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "could not allocate the packet\n");
exit(1);
}
/* frame containing input raw audio */
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
frame->nb_samples = c->frame_size;
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
/* allocate the data buffers */
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate audio data buffers\n");
exit(1);
}
/* encode a single tone sound */
t = 0;
tincr = 2 * M_PI * 440.0 / c->sample_rate;
for (i = 0; i < 200; i++) {
/* make sure the frame is writable -- makes a copy if the encoder
* kept a reference internally */
ret = av_frame_make_writable(frame);
if (ret < 0)
exit(1);
samples = (uint16_t*)frame->data[0];
for (j = 0; j < c->frame_size; j++) {
samples[2*j] = (int)(sin(t) * 10000);
for (k = 1; k < c->channels; k++)
samples[2*j + k] = samples[2*j];
t += tincr;
}
encode(c, frame, pkt, f);
}
/* flush the encoder */
encode(c, NULL, pkt, f);
fclose(f);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&c);
return 0;
}
I just want to know, where and how the .sw file is reading in the above audio encoding code?

How to improve cmuSphinx's accuracy?

I want to use pocketShpinx to do some speech-to-text word. I have install sphinxbase and pocketSphinx. And download the acoustic model/langauge model/dictionary. Then I test the example code just like follows:
#include <pocketsphinx/pocketsphinx.h>
#include <stdio.h>
#include <stdlib.h>
#include "debug.h"
int main(int argc, char *argv[])
{
ps_decoder_t *ps;
cmd_ln_t *config;
FILE *fh;
int rv;
char const *hyp, *uttid;
int32 score;
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", "/home/madper/speech/hub4opensrc.cd_continuous_8gau",
"-lm", "/home/madper/speech/language_model.arpaformat.DMP",
"-dict", "/home/madper/speech/cmudict/cmudict/sphinxdict/cmudict_SPHINX_40",
NULL);
if (config == NULL)
{
DBG (("cmd_ln_init() failed.\n"));
exit(1);
}
if ((ps = ps_init (config)) == NULL) /* init decoder */
{
DBG (("ps_init() failed.\n"));
exit(1 );
}
if ((fh = fopen("test.raw", "rb")) == NULL) /* open raw file */
{
DBG (("fopen() failed.\n"));
exit (1);
}
if ((rv = ps_decode_raw (ps, fh, "test", -1)) < 0 )
{
DBG (("ps_decode_raw() error!\n"));
exit (1);
}
if ((hyp = ps_get_hyp(ps, &score, &uttid)) == NULL)
{
DBG (("ps_get_hyp() failed!\n"));
exit (1);
}
printf ("Recognized: %s\n", hyp); /* this is what you say */
fclose(fh);
ps_free(ps);
return 0;
}
DBG is just a macro to print error message if defined DEBUG.
Then I write some code to record from mic use alsa. Like follows:
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
int main() {
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int val;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, "default",
SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0) {
fprintf(stderr,
"unable to open pcm device: %s\n",
snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params,
SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params,
SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 1);
/* 44100 bits/second sampling rate (CD quality) */
val = 16000;
snd_pcm_hw_params_set_rate_near(handle, params,
&val, &dir);
/* Set period size to 32 frames. */
frames = 16;
snd_pcm_hw_params_set_period_size_near(handle,
params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0) {
fprintf(stderr,
"unable to set hw parameters: %s\n",
snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params,
&frames, &dir);
size = frames * 2; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params,
&val, &dir);
loops = 2000000 / val;
while (loops > 0) {
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "overrun occurred\n");
snd_pcm_prepare(handle);
} else if (rc < 0) {
fprintf(stderr,
"error from read: %s\n",
snd_strerror(rc));
} else if (rc != (int)frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
rc = write(1, buffer, size);
if (rc != size)
fprintf(stderr,
"short write: wrote %d bytes\n", rc);
}
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
So, I record a raw file. Then do speech-to-test on that file. But the accuracy is very vert poor. Just like hello or go home will give me hotel or MHM MHM and so on. So what's wrong with these code?
I have read the faqs, should I use acoustic model adaptation to improve accuracy?
PS. I change stereo to mono. And the sound is strange. I can't understand what I said. So, what's wrong with it? This is that raw file test.raw
If you look at the first Q and A in http://cmusphinx.sourceforge.net/wiki/faq you will notice that the library assumes mono data.
You record in stereo.

Resources