WAV File Synthesis From Scratch - C - c

Recently I saw a video lecture in my CS 101 class that inspired me to start playing with the WAV File Format in C. My project today has been creating sounds using a simple mathematical sine function. Despite a couple obstacles, my program can now accept several inputs(frequencies of waves, amplitudes of waves, sampling rate, etc.) and create a wav file containing the specified pitches.
However, when playing these tones on my computer speakers, there is a strange, rhythmic popping sound, which varies with the sampling rate. At higher sampling rates, the frequency of the popping sound increases and turns into an annoying whining sound.
The strange part is that the popping sound is consistent across different computers with the same file.
Below I will post the code that I use to generate the WAV file. Any insights into what might be causing this phenomenon will be appreciated. It's probably just a stupid mistake on my part somewhere. :)
#include <stdio.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <string.h>
#include <math.h>
struct WAVHeader {
char ChunkID[4];
uint32_t ChunkSize;
char RIFFType[4];
};
struct FormatHeader {
char ChunkID[4];
uint32_t ChunkSize;
uint16_t CompressionCode;
uint16_t Channels;
uint32_t SampleRate;
uint32_t AvgBytesPerSec;
uint16_t BlockAlign;
uint16_t SigBitsPerSamp;
};
struct DataHeader {
char ChunkID[4];
uint32_t ChunkSize;
};
void main(int argc, char * argv[]) {
//Check for valid number of arguments or display help
if(argc < 8) {
printf("Usage:\n./Tone -l [length] -s [frequency] [amplitude] -o [output-file] -r [sample-rate]\n");
printf("-l length of tone to produce in seconds\n");
printf("-s Creates sine wave. Can be used multiple times. Frequency (Hz) and amplitude (0 - 32767) of each tone. \n");
printf("-o File to write to\n");
printf("-r samples per second (kHz). Note: Must be double highest frequency in tone.\n");
return;
}
//Organize arguments
int length, sinf[10], sina[10], samplerate;
memset(sinf, 0, sizeof(int) * 10);
memset(sina, 0, sizeof(int) * 10);
char * output = NULL;
int i = 0;
int count;
for(count = 1; count < argc; count++){
char first = *argv[count];
int second = *(argv[count] + 1);
if (first == '-') {
switch (second) {
case 's':
sinf[i] = atoi(argv[count+1]);
sina[i] = atoi(argv[count+2]);
i++;
break;
case 'l':
length = atoi(argv[count+1]);
break;
case 'o':
output = argv[count+1];
break;
case 'r':
samplerate = atoi(argv[count+1]) * 1000;
break;
}
}
}
//Allocate memory for wav file
size_t size = sizeof(struct WAVHeader) + sizeof(struct FormatHeader) + sizeof(struct DataHeader) + (length * samplerate * 2);
void * buffer = malloc(size);
//Fill buffer with headers
struct WAVHeader * WAV = (struct WAVHeader *)buffer;
struct FormatHeader * Format = (struct FormatHeader *)(WAV + 1);
struct DataHeader * Data = (struct DataHeader *)(Format + 1);
strcpy(WAV->ChunkID, "RIFF");
WAV->ChunkSize = (uint32_t)size - 8;
strcpy(WAV->RIFFType, "WAVE");
strcpy(Format->ChunkID, "fmt ");
Format->ChunkSize = 16;
Format->CompressionCode = 1;
Format->Channels = 1;
Format->SampleRate = (uint32_t)samplerate;
Format->SigBitsPerSamp = 16;
Format->BlockAlign = 2;
Format->AvgBytesPerSec = Format->BlockAlign * samplerate;
strcpy(Data->ChunkID, "data");
Data->ChunkSize = length * samplerate * 2;
//Generate Sound
printf("Generating sound...\n");
short * sound = (short *)(Data + 1);
short total;
float time;
float increment = 1.0/(float)samplerate;
for (time = 0; time < length; time += increment){
total = 0;
for (i = 0; i < 10; i++) {
total += sina[i] * sin((float)sinf[i] * time * (2 * 3.1415926));
}
*(sound + (int)(time * samplerate)) = total;
//printf("Time: %f Value: %hd\n", time, total);
}
//Write buffer to file
FILE * out = fopen(output, "w");
fwrite(buffer, size, 1, out);
printf("Wrote to %s\n", output);
return;
}

I think this is your core problem:
*(sound + (int)(time * samplerate)) = total;
I suspect that (time*samplerate) doesn't always increase on integer boundaries due to floating point rounding errors. Hence, some sample positions are skipped and/or overwritten due to rounding errors. That's just a guess.
But also, as "time" increases, the multiplication of "time * frequency * 2PI" will overflow within a float. So you should normalize "time" such that it doesn't increase forever.
In any case, I validated this modified loop works (and sounds) just fine:
float TWOPI = 6.28318531f;
unsigned int sample_count = length * samplerate;
for (unsigned int i = 0; i < sample_count; i++)
{
unsigned int j = i % samplerate; // normalize the sample position so that we don't blow up in the subsequent multiplication
float f = 0.0f;
int result;
for (int x = 0; x < 10; x++)
{
f += sina[x] * sin((sinf[x] * j * TWOPI) / samplerate);
}
result = (long)f;
//clamp to 16-bit
if (result > 32767)
{
result = 32767;
}
else if (result < -32768)
{
result = -32768;
}
sound[i] = (short)result;
//printf("%d\n", sound[i]);
}

Related

What's wrong with my PNG IDAT CHUNK datas?

i'm trying for learning purpose to create manually a png file from with OpenGL
All other CHUNKS are okk (IHDR, pHY, IEND).
firstly, I read pixels by Opengl :
int s_width = glutGet(GLUT_SCREEN_WIDTH), s_height = glutGet(GLUT_SCREEN_HEIGHT);
int pixelArraySize = s_width*s_height*_glColorChannels;
unsigned char *pixelsArrayInfo = (unsigned char*)malloc(pixelArraySize);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glReadBuffer(GL_FRONT);
glReadPixels(0, 0, (unsigned short)s_width, (unsigned short)s_height, GL_RGB, GL_UNSIGNED_BYTE, pixelsArrayInfo);
then, I created a function of generating scanlines like this:
"each scanline is an array of RGB values in one screen line preceded by '0' "
unsigned char *generateScanlines(unsigned char *pixels, int s_width, int s_height)
{
int eachScanlineLength = 1 + s_width*3;
unsigned char *finalOutput = (unsigned char*)malloc(s_height*eachScanlineLength);
for(int i=0; i<s_height; i++){
finalOutput[i*eachScanlineLength] = 0;
copyElement(finalOutput, pixels, i*eachScanlineLength, (i+1)*eachScanlineLength, i*eachScanlineLength+1);
}
return finalOutput;
}
void copyElement(unsigned char *dest, unsigned char *src, int src_debut, int src_fin, int dest_debut)
{
for(int i=src_debut, j=dest_debut; i<src_fin; i++, j++){
dest[j] = src[i];
}
}
unsigned char *deflateDatas(unsigned char *pixels, int s_width, int s_height, int *deflatedDataLength)
{
unsigned char *deflated = (unsigned char*)malloc(compressBound(s_height*(1 + s_width*3)));
unsigned char *scanlines = invertArray(generateScanlines(pixels, s_width, s_height), s_height*(1 + s_width*3));
z_stream defstream;
defstream.zalloc = Z_NULL;
defstream.zfree = Z_NULL;
defstream.opaque = Z_NULL;
defstream.avail_in = (uInt)(s_height*(1 + s_width*3));
defstream.next_in = (Bytef *)scanlines;
defstream.avail_out = (uInt)(compressBound(s_height*(1 + s_width*3)));
defstream.next_out = (Bytef *)deflated;
deflateInit(&defstream, 0);
deflate(&defstream, Z_FINISH);
deflateEnd(&defstream);
*deflatedDataLength = compressBound(s_height*(1 + s_width*3));
return deflated;
}
then, it seem it work, but when I test it my OpenGL program I get this :
[small png output][1]
also, i created a basic bmp File and it work perfectly
i try to find if it's any error, maybe it's in scanlines generation or misunderstanding with the PNG file format.
the invertArray() code :
unsigned char *invertArray(unsigned char *myArray, int arrayEnd)
{ unsigned char *invertedtableau = (unsigned char*)malloc(arrayEnd*sizeof(unsigned char));
for(int i=0 ; i<=arrayEnd ; i++)
{ invertedtableau[i] = myArray[arrayEnd-i];
}
return invertedtableau; }
SOLUTION
I found where the error comes from, accordind to Mark Adler, the scanlines gemeration method was, wrong.
Also, file was inverted because Opengl is only compatible with bottom left gormat, but png is a top left format, then we need to invert the pixel buffer before generating scanlines(ehat i tried with invertArray() method).
The last error was that the calling of deflate method and storing the deflated length was also wrong.
the whole deflating code :
// generating scanline function
unsigned char *generateScanlines(unsigned char *pixels, int s_width, int s_height, int colorChannel)
{
int eachScanlineLength = 1 + s_width * colorChannel, i = 1, j = 0; // one scanline length
unsigned char *scanlines = (unsigned char *)malloc(s_height * eachScanlineLength); // memory allocation for the scanline output
memset(scanlines, 0, s_height * eachScanlineLength * sizeof(char)); // we set all the output values to 0
// then we copy pixels elements in the output, skipping the fisrt output values, that should ever be 0
for (i = 1, j = 0; i < s_height && j < s_height; i++, j++)
memcpy(scanlines + 1 + (i - 1) * eachScanlineLength, pixels + j * (eachScanlineLength - 1), eachScanlineLength - 1);
memcpy(scanlines + 1 + (i - 1) * eachScanlineLength, pixels + j * (eachScanlineLength - 1), eachScanlineLength - 1);
return scanlines;
}
// deflating IDAT CHUNK data algorithm
unsigned char *deflateDatas(unsigned char *pixels, int s_width, int s_height, int colorChannel, int *deflatedLen)
{
unsigned long inLen = s_height * (1 + s_width * colorChannel), tmpLen = 0; // input len of scanlines datas
unsigned char *scanlines = generateScanlines(pixels, s_width, s_height, colorChannel); // generating scanlines from the pixels
unsigned char *deflatedDatas = NULL; // setting up the deflated datas output
int result = 0;
// initialising zlib
z_stream defstream;
defstream.zalloc = Z_NULL;
defstream.zfree = Z_NULL;
defstream.opaque = Z_NULL;
defstream.avail_in = inLen;
defstream.next_in = (Bytef *)scanlines;
defstream.avail_out = 0;
defstream.next_out = (Bytef *)deflatedDatas;
if ((result = deflateInit(&defstream, Z_DEFAULT_COMPRESSION)) == Z_OK)
{
// calculate the actual length and update zlib structure
unsigned long estimateLen = deflateBound(&defstream, inLen);
deflatedDatas = (unsigned char *)malloc(estimateLen);
if (deflatedDatas != NULL)
{
// updation zlib configuration
defstream.avail_out = (uInt)estimateLen;
defstream.next_out = (Bytef *)deflatedDatas;
// do the compression
deflate(&defstream, Z_FINISH);
tmpLen = (unsigned char *)defstream.next_out - deflatedDatas;
}
}
deflateEnd(&defstream); // end of deflating algorithm
*deflatedLen = tmpLen; // copying the defalted data length to the IDAT->length
free(scanlines);
return deflatedDatas;
}
the bottom left to top left pixelbuffer flipping code :
void flipPixels(unsigned char *pixelsArray, int s_width, int s_heigth, int colorChannel)
{
int totalLength = s_width * s_heigth * colorChannel;
int oneLineLength = s_width * colorChannel;
unsigned char *tmp = (unsigned char *)malloc(totalLength * sizeof(unsigned char));
memcpy(tmp, pixelsArray, totalLength);
for (int i = 0; i < s_heigth; i++)
memcpy(pixelsArray + oneLineLength * i, tmp + totalLength - oneLineLength * (i + 1), oneLineLength);
free(tmp);
}
[1]:
https://i.stack.imgur.com/5khCg.png
We don't see all the relevant code, but first, you seem to be deliberately corrupting your image data by reversing the bytes. Don't do that. Remove the call to invertArray().
Second, you are not returning the size of the compressed data. You are returning the upper-bound estimate of that.
Use deflateBound() (after calling deflateInit()), not compressBound(). Call deflateBound() once (instead of three times) and save the answer in unsigned long bound. Return the size of the compressed data, which is bound - defstream.avail_out.
Lastly, you are not compressing! The second argument to deflateInit() should be Z_DEFAULT_COMPRESSION or something else other than 0, which means no compression.
Your example has an incorrect CRC for the IDAT chunk, so there's something else wrong in the code you don't show for constructing that chunk.
Even if I undo the invertArray() (which itself has another error), what I get is still messed-up image that appears to have a byte deleted on each row.
We can't help you unless you provide all of your code. There are many errors, and you don't know where they are.

implement the recognition of mnist datasets, the Segmentation fault (core dumped) is displayed

I want to use C language to implement the recognition of mnist datasets, using a backpropagation algorithm, but when loading the input layer neural units, the Segmentation fault (core dumped) is displayed, here's the code snippet, why, and how to solve it.
#include <stdio.h>
#include <unistd.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#define PATH_TRAIN_IMAGES "../../train-images-idx3-ubyte"
#define PATH_TRAIN_LABELS "../../train-labels-idx1-ubyte"
#define PATH_WEIGHT_DATA2 "../data/data2.weight"
#define PATH_WEIGHT_DATA3 "../data/data3.weight"
#define PATH_BIAS_DATA2 "../data/data2.bias"
#define PATH_BIAS_DATA3 "../data/data3.bias"
#define TRAIN_IMAGES_NUMBER 60000
#define PIXEL 784
#define HIDDEN_UNITS_NUMBER 300
#define OUT_UNITS_NUMBER 10
#define TRAIN_TEST 0
struct Unit
{
// input with weight
float z;
// bias
float b;
// output
float a;
};
float sigmod(float z)
{
return (1 / (1 + exp(-z)));
}
struct Unit* create_unit(float uz, float ub, float ua)
{
struct Unit* unit = (struct Unit*)malloc(sizeof(struct Unit));
unit->z = uz;
unit->b = ub;
unit->a = ua;
return unit;
}
int load_train_labels(char* path_train_labels, unsigned char* ar_label)
{
FILE *fp_label;
int size_label = 0;
fp_label = fopen(path_train_labels, "rb");
fseek(fp_label, 0, SEEK_END);
size_label = ftell(fp_label);
printf("%s size:%d byte\n", path_train_labels, size_label);
rewind(fp_label);
// Starting with the 9th byte
fseek(fp_label,8,SEEK_SET);
unsigned char train_labels_buffer[size_label];
ar_label = (unsigned char*)malloc(sizeof(unsigned char) * size_label - 8);
fread(ar_label, 1, size_label - 8, fp_label);
fclose(fp_label);
return size_label;
}
int load_train_images(char* path_train_images, unsigned char* ar_img)
{
FILE *fp_img;
int size_img = 0;
fp_img = fopen(path_train_images, "rb");
fseek(fp_img, 0, SEEK_END);
size_img = ftell(fp_img);
printf("%s size:%d byte\n", path_train_images, size_img);
rewind(fp_img);
// Starting with the 17th byte, each byte stores the value of one pixel in a picture
fseek(fp_img, 16, SEEK_SET);
ar_img = (unsigned char*)malloc(sizeof(char) * size_img - 16);
fread(ar_img, 1, size_img - 16, fp_img);
fclose(fp_img);
return size_img;
}
int load_data(char* path_data, unsigned char* ar_data)
{
FILE *fp_data;
int size_data;
fp_data = fopen(path_data, "rb");
fseek(fp_data, 0, SEEK_END);
size_data = ftell(fp_data);
fseek(fp_data, 0, SEEK_SET);
ar_data = (unsigned char*)malloc(sizeof(char) * size_data);
printf("%s size:%d byte\n", path_data, size_data);
return size_data;
}
int main(int argc, char *argv[])
{
printf("Loading train labels file.\n");
unsigned char* ar_label;
int size_label;
size_label = load_train_labels(PATH_TRAIN_LABELS, ar_label);
printf("Loading train images file.\n");
unsigned char* ar_img;
int size_img;
size_img = load_train_images(PATH_TRAIN_IMAGES, ar_img);
printf("Loading random weight file.\n");
unsigned char* ar_weight2;
int size_weight2;
size_weight2 = load_data(PATH_WEIGHT_DATA2, ar_weight2);
unsigned char* ar_weight3;
int size_weight3;
size_weight3 = load_data(PATH_WEIGHT_DATA3, ar_weight3);
printf("Loading random bias file.\n");
unsigned char* ar_bias2;
int size_bias2;
size_bias2 = load_data(PATH_BIAS_DATA2, ar_bias2);
unsigned char* ar_bias3;
int size_bias3;
size_bias3 = load_data(PATH_BIAS_DATA3, ar_bias3);
float uz = 0;
float ub = 0;
float ua = 0;
struct Unit* out_units[OUT_UNITS_NUMBER];
for (int t = 0; t < OUT_UNITS_NUMBER; t++)
{
out_units[t] = create_unit(uz, ub, ua);
}
struct Unit* hid_units[HIDDEN_UNITS_NUMBER];
for(int i = 0; i < HIDDEN_UNITS_NUMBER; i++)
{
hid_units[i] = create_unit(uz, ub, ua);
}
struct Unit* in_units[PIXEL] = {NULL};
for(int i = 0; i < PIXEL; i++)
{
in_units[i] = create_unit(uz, ub, ua);
}
/*******************
* load C1 *
*******************/
printf("Loading train...\n");
float C[TRAIN_IMAGES_NUMBER];
for(int i = 0; i < PIXEL; i++)
{
in_units[i]->a = (float)*((ar_img+i*sizeof(char))); //segmentation fault(core dumped)
printf("in_unit[%d] = %f\n", i, in_units[i]->a);
}
for(int i = 0; i < HIDDEN_UNITS_NUMBER; i++)
{
for(int j = 0; j < PIXEL; j++)
{
hid_units[i]->z += in_units[j]->a * ((float)*(ar_weight2+((i*PIXEL+j)*sizeof(float))));
}
hid_units[i]->z += ((float)*(ar_bias2+(i*sizeof(float))));
hid_units[i]->a = sigmod(hid_units[i]->z);
}
for(int i = 0; i < OUT_UNITS_NUMBER; i++)
{
for(int j = 0; j < HIDDEN_UNITS_NUMBER; j++)
{
out_units[i]->z += hid_units[j]->a * ((float)*(ar_weight3+((i*HIDDEN_UNITS_NUMBER+j)*sizeof(float))));
}
out_units[i]->z += ((float)*(ar_bias3 + (i*sizeof(float))));
out_units[i]->a = sigmod(out_units[i]->z);
}
// free(in_units)
free(ar_label);
free(ar_img);
free(ar_weight2);
free(ar_bias2);
free(ar_weight3);
free(ar_bias3);
return 0;
}
Almost all source code was uploaded. I used the gdb debugger, but only showed Program terminated with signal SIGSEGV, Segmentation fault.And I turned on ulimit, but didn't find the core file.
Your pointer-passing is flawed. When calling a function and passing that function a pointer to something, you can alter the data the pointer is pointing to, but not the address of the pointer itself so that it is visible from the caller perspective (only in the callees perspective).
For example, one of your functions signature reads:
int load_data(char* path_data, unsigned char* ar_data)
In that function, you do a
ar_data = (unsigned char*)malloc(sizeof(char) * size_data);
This is fine, but this does not effect, that the caller of the function load_data can access this allocated memory. Instead this memory address is lost as soon as that function returns.
This means, that when you write
unsigned char* ar_label;
int size_label;
size_label = load_train_labels(PATH_TRAIN_LABELS, ar_label);
then after calling the function, ar_label still has its original (uninitialized) value. What you probably meant to do was to write the function signature as (notice the extra asterisks/ampersands in the following):
int load_data(char* path_data, unsigned char** ar_data)
Then, allocate the memory as:
*ar_data = (unsigned char*)malloc(sizeof(char) * size_data);
and use the function as:
unsigned char* ar_label;
int size_label;
size_label = load_train_labels(PATH_TRAIN_LABELS, &ar_label);
This way, you are passing a pointer to a pointer and therefore can alter the address the pointer ar_label points to in the caller. This means, that this way you store the address of the mallocated memory block in the callers pointer variable instead of in a copy of the pointer variable supplied as parameter. And you therefore are allowed to access the memory this pointer points to in the caller afterwards.

Mono WAV playing on both channels

I created a random mono WAV, but when I play it, I can hear the audio through both channels (left & right). Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
struct wav{
char ChunkID[4];
unsigned int ChunkSize;
char Format[4];
char Subchunk1ID[4];
unsigned int Subchunk1Size;
unsigned short int AudioFormat;
unsigned short int NumChannels;
unsigned int SampleRate;
unsigned int ByteRate;
unsigned short int BlockAlign;
unsigned short int BitsPerSample;
char SubChunk2ID[4];
unsigned int Subchunk2Size;
};
int main(){
struct wav wavHdr;
FILE *fp;
fp = fopen("MonoSound.wav", "wb");
strcpy(wavHdr.ChunkID, "RIFF");
strcpy(wavHdr.Format, "WAVE");
strcpy(wavHdr.Subchunk1ID, "fmt ");
wavHdr.Subchunk1Size = 16;
wavHdr.AudioFormat = 1;
wavHdr.NumChannels = 1;
wavHdr.SampleRate = 220505;
wavHdr.ByteRate = 441010; //(SampleRate*NumChannels*BitsPerSample/8)
wavHdr.BlockAlign = 2; //(NumChannels*BitsPerSample/8)
wavHdr.BitsPerSample = 16;
strcpy(wavHdr.SubChunk2ID, "data");
/* multiplied by 5 because there's 5 seconds of audio */
wavHdr.Subchunk2Size = (5 * wavHdr.ByteRate);
wavHdr.ChunkSize = (wavHdr.Subchunk2Size + 36);
fwrite(&wavHdr, 44, 1, fp);
int i, randVal;
unsigned int audio;
float freq = 50.0;
int amp = 32600;
float w;
srand(time(NULL));
for(i = 0; i < (5 * wavHdr.SampleRate); i++){
randVal = (rand() % 1) + 1;
amp += randVal;
w = 2.0 * 3.141592 * freq;
audio = amp * sin(w * i / 220505.0);
fwrite(&audio, 2, 1, fp);
}
return 0;
}
What have I done wrong here? The audio should only come out through one of the speakers. Thanks in advance for the help.
"The audio should only come out through one of the speakers"
Not really. When you have mono file i.e. you had one microphone when you were recording the audio, you will get same data on both output channels. If you want to hear audio only from one channel make 2 channel wav, with one channel all zeros
The audio should only come out through one of the speakers
Why do you you think so? Probably the audio driver tries to be smart and plays mono signals through both speakers (like all other consumer audio hardware does).
If you want to be sure that a signal is played on the left channel only, you have to create a stereo signal with the right channel set to silence (all zeros).
To achieve you goal you can either trick you audio card (which is by default playing the mono file into both speakers channels), or you can create a stereo file with one empty channel.
In order to do so you have to change the number of channels (set to 2 using wavHdr.NumChannels) and you have to write a the empty channel alternating with the good one (see the second command fwrite at the end of the code.)
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <cstring>
struct wav{
char ChunkID[4];
unsigned int ChunkSize;
char Format[4];
char Subchunk1ID[4];
unsigned int Subchunk1Size;
unsigned short int AudioFormat;
unsigned short int NumChannels;
unsigned int SampleRate;
unsigned int ByteRate;
unsigned short int BlockAlign;
unsigned short int BitsPerSample;
char SubChunk2ID[4];
unsigned int Subchunk2Size;
};
int main(){
struct wav wavHdr;
FILE *fp;
fp = fopen("MonoSound.wav", "wb");
strcpy(wavHdr.ChunkID, "RIFF");
strcpy(wavHdr.Format, "WAVE");
strcpy(wavHdr.Subchunk1ID, "fmt ");
wavHdr.Subchunk1Size = 16;
wavHdr.AudioFormat = 1;
wavHdr.NumChannels = 2;
wavHdr.BitsPerSample = 16;
wavHdr.SampleRate = 220505;
wavHdr.ByteRate = wavHdr.SampleRate * wavHdr.NumChannels * wavHdr.BitsPerSample/8;
wavHdr.BlockAlign = wavHdr.NumChannels * wavHdr.BitsPerSample/8;
strcpy(wavHdr.SubChunk2ID, "data");
/* multiplied by 5 because there's 5 seconds of audio */
wavHdr.Subchunk2Size = (5 * wavHdr.ByteRate);
wavHdr.ChunkSize = (wavHdr.Subchunk2Size + 36);
fwrite(&wavHdr, 44, 1, fp);
int i, randVal;
unsigned int audio, empty=0;
float freq = 50.0;
int amp = 32600;
float w;
srand(time(NULL));
for(i = 0; i < (5 * wavHdr.SampleRate); i++){
randVal = (rand() % 1) + 1;
amp += randVal;
w = 2.0 * 3.141592 * freq;
audio = amp * sin(w * i / 220505.0);
// write LEFT channel
fwrite(&audio, 2, 1, fp);
// write RIGHT channel
fwrite(&empty, 2, 1, fp);
}
return 0;
}
The order in which you write the two channel matters. If you want the empty channel to be the left one, you have to invert the two fwrite commands.
Moreover, you have to change ByteRate and BlockAlign to take into account the new channel.

Negative array indexing in shared memory based 1d stencil CUDA implementation

I'm currently working with CUDA programming and I'm trying to learn off of slides from a workshop I found online, which can be found here. The problem I am having is on slide 48. The following code can be found there:
__global__ void stencil_1d(int *in, int *out) {
__shared__ int temp[BLOCK_SIZE + 2 * RADIUS];
int gindex = threadIdx.x + blockIdx.x * blockDim.x;
int lindex = threadIdx.x + RADIUS;
// Read input elements into shared memory
temp[lindex] = in[gindex];
if (threadIdx.x < RADIUS) {
temp[lindex - RADIUS] = in[gindex - RADIUS];
temp[lindex + BLOCK_SIZE] = in[gindex + BLOCK_SIZE];
}
....
To add a bit of context. We have an array called in which as length say N. We then have another array out which has length N+(2*RADIUS), where RADIUS has a value of 3 for this particular example. The idea is to copy array in, into array out but to place the array in in position 3 from the beginning of array out i.e out = [RADIUS][in][RADIUS], see slide for graphical representation.
The confusion comes in on the following line:
temp[lindex - RADIUS] = in[gindex - RADIUS];
If gindex is 0 then we have in[-3]. How can we read from a negative index in an array? Any help would really be appreciated.
The answer by pQB is correct. You are supposed to offset the input array pointer by RADIUS.
To show this, I'm providing below a full worked example. Hope it would be beneficial to other users.
(I would say you would need a __syncthreads() after the shared memory loads. I have added it in the below example).
#include <thrust/device_vector.h>
#define RADIUS 3
#define BLOCKSIZE 32
/*******************/
/* iDivUp FUNCTION */
/*******************/
int iDivUp(int a, int b){ return ((a % b) != 0) ? (a / b + 1) : (a / b); }
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/**********/
/* KERNEL */
/**********/
__global__ void moving_average(unsigned int *in, unsigned int *out, unsigned int N) {
__shared__ unsigned int temp[BLOCKSIZE + 2 * RADIUS];
unsigned int gindexx = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int lindexx = threadIdx.x + RADIUS;
// --- Read input elements into shared memory
temp[lindexx] = (gindexx < N)? in[gindexx] : 0;
if (threadIdx.x < RADIUS) {
temp[threadIdx.x] = (((gindexx - RADIUS) >= 0)&&(gindexx <= N)) ? in[gindexx - RADIUS] : 0;
temp[threadIdx.x + (RADIUS + BLOCKSIZE)] = ((gindexx + BLOCKSIZE) < N)? in[gindexx + BLOCKSIZE] : 0;
}
__syncthreads();
// --- Apply the stencil
unsigned int result = 0;
for (int offset = -RADIUS ; offset <= RADIUS ; offset++) {
result += temp[lindexx + offset];
}
// --- Store the result
out[gindexx] = result;
}
/********/
/* MAIN */
/********/
int main() {
const unsigned int N = 55 + 2 * RADIUS;
const unsigned int constant = 4;
thrust::device_vector<unsigned int> d_in(N, constant);
thrust::device_vector<unsigned int> d_out(N);
moving_average<<<iDivUp(N, BLOCKSIZE), BLOCKSIZE>>>(thrust::raw_pointer_cast(d_in.data()), thrust::raw_pointer_cast(d_out.data()), N);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
thrust::host_vector<unsigned int> h_out = d_out;
for (int i=0; i<N; i++)
printf("Element i = %i; h_out = %i\n", i, h_out[i]);
return 0;
}
You are assuming that in array points to the first position of the memory that has been allocated for this array. However, if you see slide 47, the in array has a halo (orange boxes) of three elements before and after of the data (represented as green cubes).
My assumption is (I have not done the workshop) that the input array is first initialized with an halo and then the pointer is moved in the kernel call. Something like:
stencil_1d<<<dimGrid, dimBlock>>>(in + RADIUS, out);
So, in the kernel, it's safe to do in[-3] because the pointer is not at the beginning of the array.
There are already good answers, but to focus on the actual point that caused the confusion:
In C (not only in CUDA, but in C in general), when you access an "array" by using the [ brackets ], you are actually doing pointer arithmetic.
For example, consider a pointer like this:
int* data= ... // Points to some memory
When you then write a statement like
data[3] = 42;
you are just accessing a memory location that is "three entries behind the original data pointer". So you could also have written
int* data= ... // Points to some memory
int* dataWithOffset = data+3;
dataWithOffset[0] = 42; // This will write into data[3]
and consequently,
dataWithOffset[-3] = 123; // This will write into data[0]
In fact, you can say that data[i] is the same as *(data+i), which is the same as *(i+data), which in turn is the same as i[data], but you should not use this in real programs...)
I can compile #JackOLantern's code, but there is an warning: "pointless comparison of unsigned integer with zero":
And when run, it will abort like:
I have modified the code to the following and the warning disappeared and it can get right result:
#include <thrust/device_vector.h>
#define RADIUS 3
#define BLOCKSIZE 32
/*******************/
/* iDivUp FUNCTION */
/*******************/
int iDivUp(int a, int b){ return ((a % b) != 0) ? (a / b + 1) : (a / b); }
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/**********/
/* KERNEL */
/**********/
__global__ void moving_average(unsigned int *in, unsigned int *out, int N) {
__shared__ unsigned int temp[BLOCKSIZE + 2 * RADIUS];
int gindexx = threadIdx.x + blockIdx.x * blockDim.x;
int lindexx = threadIdx.x + RADIUS;
// --- Read input elements into shared memory
temp[lindexx] = (gindexx < N)? in[gindexx] : 0;
if (threadIdx.x < RADIUS) {
temp[threadIdx.x] = (((gindexx - RADIUS) >= 0)&&(gindexx <= N)) ? in[gindexx - RADIUS] : 0;
temp[threadIdx.x + (RADIUS + BLOCKSIZE)] = ((gindexx + BLOCKSIZE) < N)? in[gindexx + BLOCKSIZE] : 0;
}
__syncthreads();
// --- Apply the stencil
unsigned int result = 0;
for (int offset = -RADIUS ; offset <= RADIUS ; offset++) {
result += temp[lindexx + offset];
}
// --- Store the result
out[gindexx] = result;
}
/********/
/* MAIN */
/********/
int main() {
const int N = 55 + 2 * RADIUS;
const unsigned int constant = 4;
thrust::device_vector<unsigned int> d_in(N, constant);
thrust::device_vector<unsigned int> d_out(N);
moving_average<<<iDivUp(N, BLOCKSIZE), BLOCKSIZE>>>(thrust::raw_pointer_cast(d_in.data()), thrust::raw_pointer_cast(d_out.data()), N);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
thrust::host_vector<unsigned int> h_out = d_out;
for (int i=0; i<N; i++)
printf("Element i = %i; h_out = %i\n", i, h_out[i]);
return 0;
}
The result is like this:

How to perform a function on an array of data in C

Im trying to perform a function on an array of data but Im not quite sure how to go about it,
here is my code
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define BUFFER_LEN 10
#define SAMPLE_RATE 48000
#define MAX_DELAY 0.25
int buffer_in[]= {0,1,2,3,4,5,6,7,8,9};
int buffer_out[10];
short int flanger(float , float , int , short int );
int main(void)
{
int j,k,l;
for (j = 0; j <=BUFFER_LEN; j++){
buffer_out[j] = flanger(buffer_in[j]); //this is causing the error
printf("buffer out value = %d",buffer_out[j]);
}
return 0;
}
// Flanger function
short int flanger(float range, float delay, int rate, short int inData){
float flangerDelay; /* stores current delay required for flange effect */
static int i=0; /* keeps track of time for creating sweep waveform */
static float sweepValue=0; /* keeps track of current sweep delay in ms */
static int sweepFlag=1; /* keeps track of waveform movement */
static int writePtr=0; /* pointer to newest audio sample in buffer */
static int readPtr=0; /* pointer to oldest audio sample in buffer */
float tmp; /* tmp value to see if dly will point to a position in buff */
float delayArray[50];
/* convert rate from Hz to Hz according to current sample rate */
/* NOTE: If it does not divise exact, take the integer part only! */
/* is it time to change waveform? if not, increment counter */
if (i >= rate) {
/* has the maximum possible delay for sweep been reached? */
if (sweepValue >= range)
sweepFlag = 0; /* start the \ of triangular waveform */
else if (sweepValue <= 0)
sweepFlag = 1; /* start the / of triangular waveform */
/* Is the waveform rising or falling? */
if (sweepFlag==1)
sweepValue += 0.001; /* increase sweep delay by .001 ms */
else
sweepValue -= 0.001; /* decrease sweep delay by .001 ms */
/* reset i, to start count before waveform changes shape again */
i=0;
}
else i++;
/* Calculate the total current to delay (in ms, not samples!) */
flangerDelay = sweepValue + delay;
/* calculate delay in samples rather than in time */
tmp = flangerDelay * 22.4f; //(float)(SAMPLE_RATE/1000);
//printf("flangerDelay: %f samples: %d\n", flangerDelay, tmp);
/* Calculate position of the read & write pointers */
if (writePtr < (int)tmp )
readPtr = (((SAMPLE_RATE/1000)*MAX_DELAY) - ((int)tmp - writePtr));
else
readPtr = writePtr - (int)tmp;
/* has the write pointer reached end of delay buffer? */
if (writePtr > ((SAMPLE_RATE/1000)*MAX_DELAY))
writePtr=0;
else
writePtr++;
/* now add current audio sample to array and return oldest sample */
delayArray[writePtr] = inData;
/* is tmp a whole value? i.e. will it point to a sample in the buffer? */
if (tmp > (int)tmp){
/* not a whole number! therefore, interpolation is required! */
if (readPtr == ((SAMPLE_RATE/1000) * MAX_DELAY))
return((delayArray[readPtr] + delayArray[0])/2);
else
return((delayArray[readPtr] + delayArray[readPtr+1])/2);
}
else {
/* is a whole number! therefore, can take straight from buffer! */
return delayArray[readPtr];
}
}
i get the error at this part
for (j = 0; j <=BUFFER_LEN; j++){
buffer_out[j] = flanger(buffer_in[j]); //this is causing the error
printf("buffer out value = %d",buffer_out[j]);
}
basically i want to perform the function on the data in buffer_in and put that result into buffer_out
have i got it set up wrong?
many thanks for your help!
You declare the flanger function to take four arguments, but pass only one argument when calling it. You must call it with the correct number of arguments, or change the function to take only a single argument.
If you want to pass an array to flanger, you must declare it to accept an array:
short int flanger(int aBuffer[]){
Instead, you declared it to take 4 different parameters.
You need something like this. But it's hard to specify further as your code is too far from what you seem to want.
void flanger(int buf_out[], int buf_in[], float range, float delay, int rate, short int inData)
Maybe you just need:
buffer_out[j] = flanger(range_val, delay_val, rate_val,buffer_in[j])
Perhaps using an array of struct will serve you better? Something like: (should build as is in ANSI C)
#include <windows.h>
#include <ansi_c.h>
typedef struct {
float range;
float delay;
int rate;
short inData;
} PARAMS;
PARAMS params[10], *pParams;
void flanger(PARAMS *p);
void main(void)
{
int i;
pParams = &params[0];
for(i=0;i<10;i++)
{
pParams[i].range = 45.0 + (float)i;
pParams[i].delay = .003 + (float)i;
pParams[i].rate = 23 + i;
pParams[i].inData = 1 + i;
}
flanger(pParams);
}
void flanger(PARAMS *p)
{
//do something with params
}
[EDIT to show two struct * arguments]
This should build and run in an ANSI C compiler, just copy and paste:
#include <windows.h>
#include <ansi_c.h>
typedef struct {
float range;
float delay;
int rate;
short inData;
} PARAMS;
#define DATESIZE 10
void flanger(PARAMS *o, PARAMS *i);
void main(void)
{
int i;
PARAMS out[DATESIZE], *pOut, in[DATESIZE], *pIn;
pOut = &out[0];
pIn = &in[0];
printf("Out Data\n");
printf("Range Delay Rate InData\n");
for(i=0;i<DATESIZE;i++)
{
pOut[i].range = 45.0 + (float)i;
pOut[i].delay = .003 + (float)i;
pOut[i].rate = 23 + i;
pOut[i].inData = 1 + i;
printf("%4.2f %5.3f %d %d\n",
pOut[i].range,
pOut[i].delay,
pOut[i].rate,
pOut[i].inData);
}
flanger(pOut, pIn);
printf("In Data\n");
printf("Range Delay Rate InData\n");
for(i=0;i<DATESIZE;i++)
{
printf("%4.2f %5.3f %d %d\n",
pIn[i].range,
pIn[i].delay,
pIn[i].rate,
pIn[i].inData);
}
getchar();
}
void flanger(PARAMS *out, PARAMS *in)
{
int i;
//process "out", pass back "in"
for(i=0;i<DATESIZE;i++)
{
in[i].range = pow(out[i].range, 2.0);
in[i].delay = pow(out[i].delay, 2.0);
in[i].rate = pow(out[i].rate, 2.0);
in[i].inData = pow(out[i].inData, 2.0);
}
}

Resources