Recipe to copy 1D strided data with cudaMemcpy2D

Recipe to copy 1D strided data with cudaMemcpy2D - arrays

If one has two continuous ranges of device memory it is possible to copy memory from from one to the other using cudaMemcpy.
double* source = ...
double* dest = ...
cudaMemcpy(dest, source, N, cudaMemcpyDeviceToDevice);
Now suppose that I want to copy source into dest, but every 2 or 3 elements respectively.
That is dest[0] = source[0], dest[3] = source[2], dest[6] = source[4], ....
Of course a single plain cudaMemcpy cannot do this.
Intuitively, cudaMemcpy2D should be able to do the job, because "strided elements can be see as a column in a larger array".
But cudaMemcpy2D it has many input parameters that are obscure to interpret in this context, such as pitch.
For example, I manager to use cudaMemcpy2D to reproduce the case where both strides are 1.
cudaMemcpy2D(dest, 1, source, 1, 1, n*sizeof(T), cudaMemcpyDeviceToHost);
But I cannot figure out the general case, with dest_stride and source_stride difference from 1.
Is there a way to copy strided data to stride data with cudaMemcpy2D?
In which order do I have to put the known information about the layout?, namely, in terms of the two strides and sizeof(T).
cudaMemcpy2D(dest, ??, source, ???, ????, ????, cudaMemcpyDeviceToHost);

Yes, this can be done. It is easier to illustrate in code than words so:
#include <iostream>
int main()
{
const size_t swidth = 2;
const size_t sheight = 4;
size_t spitch = swidth * sizeof(int);
int source[swidth * sheight] = { 0, 1, 2, 3, 4, 5, 6, 7 };
const size_t dwidth = 3;
const size_t dheight = 4;
size_t dpitch = dwidth * sizeof(int);
int dest[dwidth * dheight] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
const size_t cwidth = 1 * sizeof(int);
const size_t cheight = 3;
int* source_d; cudaMalloc(&source_d, spitch * sheight);
cudaMemcpy(source_d, &source[0], spitch * sheight, cudaMemcpyHostToDevice);
cudaMemcpy2D(&dest[0], dpitch, source_d, spitch, cwidth, cheight, cudaMemcpyDeviceToHost);
for(int i=0; i < 12; i++) std::cout << i << " " << dest[i] << std::endl;
return 0;
}
which does this:
$ nvcc -std=c++11 -arch=sm_52 -o strided_copy strided_copy.cu
$ cuda-memcheck ./strided_copy
========= CUDA-MEMCHECK
0 0
1 -1
2 -1
3 2
4 -1
5 -1
6 4
7 -1
8 -1
9 -1
10 -1
11 -1
========= ERROR SUMMARY: 0 errors
In essence, you are copying a width of 4 bytes (an int) with a stride of 8 bytes (two ints) into a destination with a stride of 12 bytes (three ints). I only copied three rwos so that it obvious how the row argument works. Adjust the size of the copy element and strides, etc. to taste.

A generic function for such a strided copy could look roughly like this:
void cudaMemcpyStrided(
void *dst, int dstStride,
void *src, int srcStride,
int numElements, int elementSize, int kind) {
int srcPitchInBytes = srcStride * elementSize;
int dstPitchInBytes = dstStride * elementSize;
int width = 1 * elementSize;
int height = numElements;
cudaMemcpy2D(
dst, dstPitchInBytes,
src, srcPitchInBytes,
width, height,
kind);
}
And for your example, it could be called as
cudaMemcpyStrided(dest, 3, source, 2, 3, sizeof(double), cudaMemcpyDeviceToDevice);
"Roughly", because I just translated it on the fly from the (Java/JCuda based) code that I tested it with:
import static jcuda.runtime.JCuda.cudaMemcpy2D;
import java.util.Arrays;
import java.util.Locale;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.runtime.cudaMemcpyKind;
public class JCudaStridedMemcopy {
public static void main(String[] args) {
int dstLength = 9;
int srcLength = 6;
int dstStride = 3;
int srcStride = 2;
int numElements = 3;
runExample(dstLength, dstStride, srcLength, srcStride, numElements);
dstLength = 9;
srcLength = 12;
dstStride = 3;
srcStride = 4;
numElements = 3;
runExample(dstLength, dstStride, srcLength, srcStride, numElements);
dstLength = 18;
srcLength = 12;
dstStride = 3;
srcStride = 2;
numElements = 6;
runExample(dstLength, dstStride, srcLength, srcStride, numElements);
}
private static void runExample(int dstLength, int dstStride, int srcLength, int srcStride, int numElements) {
double dst[] = new double[dstLength];
double src[] = new double[srcLength];
for (int i = 0; i < src.length; i++) {
src[i] = i;
}
cudaMemcpyStrided(dst, dstStride, src, srcStride, numElements);
System.out.println("Copy " + numElements + " elements");
System.out.println(" to array with length " + dstLength + ", with a stride of " + dstStride);
System.out.println(" from array with length " + srcLength + ", with a stride of " + srcStride);
System.out.println("");
System.out.println("Destination:");
System.out.println(toString2D(dst, dstStride));
System.out.println("Flat: " + Arrays.toString(dst));
System.out.println("");
System.out.println("Source:");
System.out.println(toString2D(src, srcStride));
System.out.println("Flat: " + Arrays.toString(src));
System.out.println("");
System.out.println("Done");
System.out.println("");
}
private static void cudaMemcpyStrided(double dst[], int dstStride, double src[], int srcStride, int numElements) {
long srcPitchInBytes = srcStride * Sizeof.DOUBLE;
long dstPitchInBytes = dstStride * Sizeof.DOUBLE;
long width = 1 * Sizeof.DOUBLE;
long height = numElements;
cudaMemcpy2D(
Pointer.to(dst), dstPitchInBytes,
Pointer.to(src), srcPitchInBytes,
width, height,
cudaMemcpyKind.cudaMemcpyHostToHost);
}
public static String toString2D(double[] a, long columns) {
String format = "%4.1f ";
;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < a.length; i++) {
if (i > 0 && i % columns == 0) {
sb.append("\n");
}
sb.append(String.format(Locale.ENGLISH, format, a[i]));
}
return sb.toString();
}
}
To give an idea of what the function does, based on the examples/test cases, here is the output:
Copy 3 elements
to array with length 9, with a stride of 3
from array with length 6, with a stride of 2
Destination:
0.0 0.0 0.0
2.0 0.0 0.0
4.0 0.0 0.0
Flat: [0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0]
Source:
0.0 1.0
2.0 3.0
4.0 5.0
Flat: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
Done
Copy 3 elements
to array with length 9, with a stride of 3
from array with length 12, with a stride of 4
Destination:
0.0 0.0 0.0
4.0 0.0 0.0
8.0 0.0 0.0
Flat: [0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 8.0, 0.0, 0.0]
Source:
0.0 1.0 2.0 3.0
4.0 5.0 6.0 7.0
8.0 9.0 10.0 11.0
Flat: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
Done
Copy 6 elements
to array with length 18, with a stride of 3
from array with length 12, with a stride of 2
Destination:
0.0 0.0 0.0
2.0 0.0 0.0
4.0 0.0 0.0
6.0 0.0 0.0
8.0 0.0 0.0
10.0 0.0 0.0
Flat: [0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 6.0, 0.0, 0.0, 8.0, 0.0, 0.0, 10.0, 0.0, 0.0]
Source:
0.0 1.0
2.0 3.0
4.0 5.0
6.0 7.0
8.0 9.0
10.0 11.0
Flat: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
Done

Related

How to find the index of a multi-dimensional array which has the maximum value

I have this code sample to find the Maximum Usage days which works like a charm. But I need to get which array index has the maximum value. I tried many methods, but couldn't come up with a solution. This is what I have tried:
#include<stdio.h>
int main(){
float maximumDayUsage=0,minimumDayDayUsage=0;
int whichDayHasMaximumUsage;
float usage[7][5]={ {1.2, 2.1, 0.8, 0.0, 4.1},
{1.0, 1.3, 4.0, 1.5, 7.8},
{3.2, 1.0, 1.3, 4.0, 9.5},
{1.5, 3.2, 2.3, 0.4, 7.4},
{1.2, 2.1, 0.8, 0.0, 4.1},
{2.6, 2.1, 1.7, 7.0, 13.4},
{1.2, 2.1, 0.8, 0.0, 4.1} };
for(int i=0;i<7;i++){
for(int j=0;j<5;j++){
if(usage[i][j]>maximumDayUsage){
maximumDayUsage=usage[i][j];
whichDayHasMaximumUsage++;
}else{
}
}
}
printf("Usage: %.2f\n",maximumDayUsage);
printf("Which Day: %d",whichDayHasMaximumUsage);
return 0;
}

Just save the i and j indexes of the maximum usage day when you iterate over the array:
int max_i = 0;
int max_j = 0;
float maximumDayUsage = 0;
for(int i=0;i<7;i++){
for(int j=0;j<5;j++){
if(usage[i][j] > maximumDayUsage) {
maximumDayUsage = usage[i][j];
max_i = i;
max_j = j;
}
}
}

Drawing .obj with VBO

I have a problem with drawing a cube with VBO from a .obj file.
Here is the .obj :
# cube.obj
#
g cube
v 0.0 0.0 0.0
v 0.0 0.0 1.0
v 0.0 1.0 0.0
v 0.0 1.0 1.0
v 1.0 0.0 0.0
v 1.0 0.0 1.0
v 1.0 1.0 0.0
v 1.0 1.0 1.0
vn 0.0 0.0 1.0
vn 0.0 0.0 -1.0
vn 0.0 1.0 0.0
vn 0.0 -1.0 0.0
vn 1.0 0.0 0.0
vn -1.0 0.0 0.0
f 1//2 7//2 5//2
f 1//2 3//2 7//2
f 1//6 4//6 3//6
f 1//6 2//6 4//6
f 3//3 8//3 7//3
f 3//3 4//3 8//3
f 5//5 7//5 8//5
f 5//5 8//5 6//5
f 1//4 5//4 6//4
f 1//4 6//4 2//4
f 2//1 6//1 8//1
f 2//1 8//1 4//1
To draw it, I first read the objet with the glmReadOBJ function. Next, I extract the information contained in the model generated (with the "trianglulate" function) to be able to create the VBO object and then draw it, here is what I do :
void triangulate(GLfloat* vertices, GLfloat* normals, GLMmodel *model)
{
int i, j;
int it = 0;
GLuint *tempN, *tempV;
for (int i = 0; i < model->numtriangles; i++)
{
tempV = model->triangles[i].vindices;
tempN = model->triangles[i].nindices;
for (int j = 0; j < 3; j++)
{
vertices[it] = model->vertices[tempV[j] - 1];
normals[it] = model->normals[tempN[j] - 1];
it++;
}
}
}
void glmInitVBO(GLMmodel* model, int* vboId)
{
GLfloat *vertices = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
GLfloat *normals = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
triangulate(vertices, normals, model);
glGenBuffersARB(1, vboId);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, vboId);
glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices) + sizeof(normals), 0, GL_STATIC_DRAW_ARB);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(vertices),vertices); // copy vertices starting from 0 offest
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices), sizeof(normals), normals); // copy normals after vertices
}
void glmDrawVBO(GLMmodel* model, int* vboId)
{
GLfloat *vertices = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
GLfloat *normals = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
triangulate(vertices, normals, model);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, *vboId);
glEnableClientState(GL_NORMAL_ARRAY);
glEnableClientState(GL_VERTEX_ARRAY);
glNormalPointer(GL_FLOAT, 0, (void*)sizeof(vertices));
glVertexPointer(3, GL_FLOAT, 0, 0);
glDrawArrays(GL_TRIANGLES, 0, 36);
glDisableClientState(GL_VERTEX_ARRAY); // disable vertex arrays
glDisableClientState(GL_NORMAL_ARRAY);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
}
In my main, I call glmReadOBJ, then glmInitVBO and finally glmDrawVBO but nothing happens : the window remains black, nothing is drawn.
I don't know what I did wrong and tried many things for hours, but the only thing I obtain in the end is a black window ...
Thank you for your help !

The main issue I see is unrelated to your OpenGL usage, but just a misunderstanding of how the sizeof operator works. For example in this code segment (there are more similar cases in the rest of the code):
GLfloat *vertices = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
GLfloat *normals = (GLfloat*)malloc(model->numtriangles * 3 * sizeof(GLfloat));
...
glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices) + sizeof(normals), 0, GL_STATIC_DRAW_ARB);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(vertices),vertices); // copy vertices starting from 0 offest
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vertices), sizeof(normals), normals); // copy normals after vertices
vertices and normal are declared as pointer variables. Their sizes are therefore 32 bits (4 bytes) when building in 32-bit mode, and 64 bits (8 bytes) when building in 64-bit mode. So 4/8 are the values you get when using the sizeof operator on the variables.
What you need to pass to the glBufferData() and glBufferSubData() functions is the actual size of the data you allocated, not the size of the pointer. For example:
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, model->numtriangles * 3 * sizeof(GLfloat), vertices);
Another problem is with this call in glmInitVBO():
glBindBufferARB(GL_ARRAY_BUFFER_ARB, vboId);
where vboId is declared as a pointer to the int value that contains the VBO id. However, glBindBuffer() takes the id as an argument, not a pointer to the id. So the call should be:
glBindBufferARB(GL_ARRAY_BUFFER_ARB, *vboId);
BTW, these functions have been part of standard OpenGL since about version 1.1. There really shouldn't be a need to use the extension version.

SSE: conditionally replace pixel

I'm trying to vectorize some code.
Idea: we have a pixel(__m128 in), if any of it's elements is bigger than upper, replace entier pixel with different pixel(__m128 upper_color)
Unvectorized code that works:
if(inp[0] >= upper || inp[1] >= upper || inp[2] >= upper)
{
outp[0] = upper_color[0];
outp[1] = upper_color[1];
outp[2] = upper_color[2];
}
So far i came up with following, but (i believe so) it replaces not entire pixel, but only those components that are bigger than upper:
const __m128 pixel = _mm_load_ps(in);
const __m128 isoe = _mm_cmpge_ps(pixel, upper);
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel),
_mm_and_ps(isoe, upper_color));
_mm_stream_ps(out, result);
Let's assume upper = 1,1,1 and upper_color = 1,0,0
Fourth channel is alpha, so i do not care about it.
Results:
IN: 0.5 0.3 0.7
OUT: 0.5 0.3 0.7 (Expected)
OUT: 0.5 0.3 0.7 (Recieved)
IN: 1.5 1.1 0.7
OUT: 1 0 0 (Expected)
OUT: 1 0 0.7 (Recieved)
Maybe someone could help me? Is this is even possible?

You need to compute horizontal OR. There is no horizontal OR instruction in SSE, but such operation can be simulated with 2x UNPACK + vertical OR.
const __m128 pixel = _mm_load_ps(in);
/* (p3, p2, p1, p0 ) */
__m128 isoe = _mm_cmpge_ps(pixel, upper);
/* (p3|p1, p2|p0, p3|p1, p2|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
/* (p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel), _mm_and_ps(isoe, upper_color));
_mm_stream_ps(out, result);

You can use _mm_movemask_epi8 to do a horizontal OR.
#include <stdio.h>
#include <emmintrin.h>
void foo(float ina[]) {
//float ina[] = {0.5, 0.3, 0.7, 0};
float uppera[] = {1,1,1,1};
float upper_colora[] = {1,0,0,0};
float out[4];
__m128 in = _mm_load_ps(ina);
__m128 upper = _mm_load_ps(uppera);
__m128 upper_color = _mm_load_ps(upper_colora);
const __m128 pixel = _mm_load_ps(ina);
const __m128 isoe = _mm_cmpge_ps(pixel, upper);
if(_mm_movemask_epi8(_mm_castps_si128(isoe))) {
_mm_stream_ps(out, upper_color);
}
else {
_mm_stream_ps(out, in);
}
printf("%f %f %f %f\n", out[0], out[1], out[2], out[3]);
}
int main() {
float ina1[] = {0.5, 0.3, 0.7, 0}; //output 0.5 0.3 0.7 0.0
float ina2[] = {0.5, 1.1, 0.7, 0}; //output 1.0 0.0 0.0 0.0
foo(ina1);
foo(ina2);
}

3D identity matrix to correctly set vertices

Im playing around with matrices, with a view to doing 3D transformation in GDI (for the fun of it). At the moment i'm checking that im getting the right values from identity matrix given a representation of four vertices arranged in a square. I've been scratching my head as to why it's not giving expected output. I have done my research but can't see what i am doing wrong here.
Here's my definition of matrix.
typedef struct m{
float _m01, _m05, _m09, _m13;
float _m02, _m06, _m10, _m14;
float _m03, _m07, _m11, _m15;
float _m04, _m08, _m12, _m16;
}mat;
struct m matIdentity(struct m *m1){
m1->_m01 = 1.0; m1->_m05 = 0.0; m1->_m09 = 0.0; m1->_m13 = 0.0;
m1->_m02 = 0.0; m1->_m06 = 1.0; m1->_m10 = 0.0; m1->_m14 = 0.0;
m1->_m03 = 0.0; m1->_m07 = 0.0; m1->_m11 = 1.0; m1->_m15 = 0.0;
m1->_m04 = 0.0; m1->_m08 = 0.0; m1->_m12 = 0.0; m1->_m16 = 1.0;
}
Here's making use of matrix with
struct m matrix;
matIdentity(&matrix);
//represent 4 vertices(x,y,z,w);
float square[4][4] = {
{0.0, 0.0, 0.0, 1.0},
{0.0, 20.0, 0.0, 1.0},
{20.0, 20.0, 0.0, 1.0},
{20.0, 0.0, 0.0, 1.0}
};
float result[4][4];
int i = 0;
for(i = 0; i < 4; i++){
result[i][1] = (matrix._m01 * square[i][0]) + (matrix._m05 * square[i][1]) + (matrix._m09 * square[i][2]) + (matrix._m13 * square[i][3]);
result[i][2] = (matrix._m02 * square[i][0]) + (matrix._m06 * square[i][1]) + (matrix._m10 * square[i][2]) + (matrix._m14 * square[i][3]);
result[i][3] = (matrix._m03 * square[i][0]) + (matrix._m07 * square[i][1]) + (matrix._m11 * square[i][2]) + (matrix._m15 * square[i][3]);
result[i][4] = (matrix._m04 * square[i][0]) + (matrix._m08 * square[i][1]) + (matrix._m12 * square[i][2]) + (matrix._m16 * square[i][3]);
}
char strOutput[500];
sprintf(strOutput,"%f %f %f %f\n %f %f %f %f\n %f %f %f %f\n %f %f %f %f\n ",
result[0][0], result[0][1], result[0][2], result[0][3],
result[1][0], result[1][1], result[1][2], result[1][3],
result[2][0], result[2][1], result[2][2], result[2][3],
result[3][0], result[3][1], result[3][2], result[3][3]
);
I have a feeling the problem is somewhere to do with multiplying a row based representation of vertices using a column major matrix. Can anyone please suggest how i should be doing this.

I don't understand why you don't use array first, then start to use array and iteration, and in the end give up iteration. Please, such program can only cause confusion.
The correct formula is C(i, j)=sigma(A(i, k)*B(k, j), k=1..n), where C=AB and n is 4 for your case.
(e.g., this line should be like: result[i][0] = (matrix._m01 * square[0][i]) + (matrix._m02 * square[1][i]) + (matrix._m03 * square[2][i]) + (matrix._m04 * square[3][i]); )Write a simple nested for-iteration to calculate this...
This is not for one vector, but n vectors....

This is not matrix multiplication. Multiplying a vector by a matrix goes like this:
float mat[4][4];
float vec_in[4];
float vec_out[4];
// todo: initialize values
for (int j = 0; j < 4; ++j)
{
vec_out[j] = 0.0f;
for (int i = 0; i < 4; ++i)
{
vec_out[j] += vec_in[i] * mat[i][j];
}
}

Storing data in a program instead of in an external file

I have the part of the following C code that uses data from a file names WMM.COF and uses the data stored in the file to compute the magnetic field of the earth. The program works perfectly except I can't have the program access the external file; I want to have all of the data already stored in the program. I tried using a structure array to replicate the data and then put the array into a string but this causes an error in the program and doesn't produce the correct results. Here is the code of the program that I'm trying to modify.
static void E0000(int IENTRY, int *maxdeg, double alt, double glat, double glon, double time, double *dec, double *dip, double *ti, double *gv)
{
static int maxord,i,icomp,n,m,j,D1,D2,D3,D4;
static double c[13][13],cd[13][13],tc[13][13],dp[13][13],snorm[169],
sp[13],cp[13],fn[13],fm[13],pp[13],k[13][13],pi,dtr,a,b,re,
a2,b2,c2,a4,b4,c4,epoch,gnm,hnm,dgnm,dhnm,flnmj,otime,oalt,
olat,olon,dt,rlon,rlat,srlon,srlat,crlon,crlat,srlat2,
crlat2,q,q1,q2,ct,st,r2,r,d,ca,sa,aor,ar,br,bt,bp,bpp,
par,temp1,temp2,parp,bx,by,bz,bh;
static char model[20], c_str[81], c_new[5];
static double *p = snorm;
char answer;
FILE *wmmdat;
wmmdat = fopen("WMM.COF","r");
/* INITIALIZE CONSTANTS */
maxord = *maxdeg;
sp[0] = 0.0;
cp[0] = *p = pp[0] = 1.0;
dp[0][0] = 0.0;
a = 6378.137;
b = 6356.7523142;
re = 6371.2;
a2 = a*a;
b2 = b*b;
c2 = a2-b2;
a4 = a2*a2;
b4 = b2*b2;
c4 = a4 - b4;
/* READ WORLD MAGNETIC MODEL SPHERICAL HARMONIC COEFFICIENTS */
c[0][0] = 0.0;
cd[0][0] = 0.0;
fgets(c_str, 80, wmmdat);
S3:
if (fgets(c_str, 80, wmmdat) == NULL) goto S4;
/* CHECK FOR LAST LINE IN FILE */
for (i=0; i<4 && (c_str[i] != '\0'); i++)
{
c_new[i] = c_str[i];
c_new[i+1] = '\0';
}
icomp = strcmp("9999", c_new);
if (icomp == 0) goto S4;
/* END OF FILE NOT ENCOUNTERED, GET VALUES */
sscanf(c_str,"%d%d%lf%lf%lf%lf",&n,&m,&gnm,&hnm,&dgnm,&dhnm);
if (n > maxord) goto S4;
if (m > n || m < 0.0)
{
fprintf(stderr, "Corrupt record in model file WMM.COF\n");
exit(1);
}
if (m <= n)
{
c[m][n] = gnm;
cd[m][n] = dgnm;
if (m != 0)
{
c[n][m-1] = hnm;
cd[n][m-1] = dhnm;
}
}
goto S3;
/* CONVERT SCHMIDT NORMALIZED GAUSS COEFFICIENTS TO UNNORMALIZED */
S4:
*snorm = 1.0;
fm[0] = 0.0;
for (n=1; n<=maxord; n++)
{
*(snorm+n) = *(snorm+n-1)*(double)(2*n-1)/(double)n;
j = 2;
for (m=0,D1=1,D2=(n-m+D1)/D1; D2>0; D2--,m+=D1)
{
k[m][n] = (double)(((n-1)*(n-1))-(m*m))/(double)((2*n-1)*(2*n-3));
if (m > 0)
{
flnmj = (double)((n-m+1)*j)/(double)(n+m);
*(snorm+n+m*13) = *(snorm+n+(m-1)*13)*sqrt(flnmj);
j = 1;
c[n][m-1] = *(snorm+n+m*13)*c[n][m-1];
cd[n][m-1] = *(snorm+n+m*13)*cd[n][m-1];
}
c[m][n] = *(snorm+n+m*13)*c[m][n];
cd[m][n] = *(snorm+n+m*13)*cd[m][n];
}
fn[n] = (double)(n+1);
fm[n] = (double)n;
}
k[1][1] = 0.0;
otime = oalt = olat = olon = -1000.0;
fclose(wmmdat);
return;
The code that I came up with to include the data in the program is as follows:
struct wmm
{
int alpha;
int beta;
float gamma;
float delta;
float epsilon;
float zeta;
}book[90]= {{1, 0, -29496.6, 0.0, 11.6, 0.0},
{1, 1, -1586.3, 4944.4, 16.5, -25.9},
{2, 0, -2396.6, 0.0, -12.1, 0.0},
{2, 1, 3026.1, -2707.7, -4.4, -22.5},
{2, 2, 1668.6, -576.1, 1.9, -11.8},
{3, 0, 1340.1, 0.0, 0.4, 0.0},
/* 50+ similar lines of code */
{12, 8, -0.4, 0.1, 0.0, 0.0},
{12, 9, -0.4, 0.3, 0.0, 0.0},
{12, 10, 0.2, -0.9, 0.0, 0.0},
{12, 11, -0.8, -0.2, -0.1, 0.0},
{12, 12, 0.0, 0.9, 0.1, 0.0}};
for (i = 0; i < 90 && offset < buf_size; i++)
{
offset += snprintf(c_str + offset,buf_size - offset, "%d %d %7.1lf %7.1lf %7.1lf %7.1lf \n", book[i].alpha, book[i].beta , book[i].gamma , book[i].delta, book[i].epsilon, book[i].zeta);
}
sscanf(c_str,"%d%d%lf%lf%lf%lf",&n,&m,&gnm,&hnm,&dgnm,&dhnm);
The problem is the snprintf causes the program to freeze and terminate every time it is placed in the program. When the code that I wrote is run on it's own it seems to create c_str properly except when I try to view the variables n,m,gnm,hnm,dgnm, and dhnm only a single value for each is displayed.

I need to continue in an answer due to a lack of space/formatting in a comment.
First of all, you do have 90 entries but you can let the compiler figure out how many entries the book array needs:
struct wmm {
int alpha;
int beta;
float gamma;
float delta;
float epsilon;
float zeta;
} book[] = {
{1, 0, -29496.6, 0.0, 11.6, 0.0},
{1, 1, -1586.3, 4944.4, 16.5, -25.9},
/* ... */
{12, 12, 0.0, 0.9, 0.1, 0.0}
};
And, more importantly, you don't need to put them in a string and pull them back out when you already have them on hand:
for(i = 0; i < sizeof(book)/sizeof(book[0]); ++i) {
n = book[i].alpha;
m = book[i].beta;
gnm = book[i].gamma;
hnm = book[i].delta;
dgnm = book[i].epsilon;
dhnm = book[i].zeta;
/* Do whatever you need to do with the above variables. */
}
This will neatly side step whatever buffer overflow you were causing with your snprintf.
Your c_str is only a char[81] and you're going through your loop 90 times and incrementing your offset into c_str each time; so, you'll run off the end of c_str before long and then you'll tell snprintf to scribble all over unallocated memory. Hence your segfault.