I have this functions, the result is correct but the compiler don't vectorize this.
How can I achive that the compiler vectorize this and how can I optimize this codes?
void LongNumSet( char *L, unsigned N, char digit )
{
for (int i = 0; i < N; ++i){
L[i] = digit;
}
}
void LongNumCopy( char *Vin, char *Vout, unsigned N )
{
for ( int i=0; i< N; ++i )
{
Vout[i] = Vin[i];
}
}
char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N )
{
char CARRY = 0,R,aux;
Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);
Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);
for ( int i=0; i< N; ++i )
{
char R = Vin1[i] + Vin2[i] + CARRY;
aux = R <= 9;
Vout[i] = (aux) ? R:R-ten;
CARRY = (aux) ? 0:1;
}
return CARRY;
}
char LongNumAddDigit( char *V, char digit, unsigned N )
{
int i=0;
char R = V[0] + digit;
if ( R < ten){
V[0] = R;
return 0;
}
V[0] = R-ten;
// add carry, maybe iteratively for all digits
char CARRY = 1;
i = 1;
while ( CARRY && i < N )
{
if ( V[i] < 9 )
{
V[i]++;
CARRY = 0;
}
else
{
V[i] = 0;
i++; // CARRY remains set to 1
}
}
return CARRY;
}
I use the comand gcc -O3 -ffast-math -msse -funroll-all-loops -ftree-vectorizer-verbose=25 -lm -g $1 -o ${2}.O3 and I executate the program in 55 s.
This is all of code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
// Variable used to generate pseudo-random numbers
unsigned int seed;
unsigned int temp;
unsigned int var1 = 214013;
unsigned int var2 = 2531011;
#define val13 13
#define ten 10
// Function to generate pseudo-random numbers
inline int myRandom() {
temp = var1*seed;
seed = temp + var2;
return (seed>>val13);
}
void LongNumInit( char *L, unsigned N )
{
for ( int i=0; i<N;++i )
{
L[i] = myRandom() % ten; // digito decimal
}
}
void LongNumPrint( char *L, unsigned N, char *Name )
{
printf("%s:", Name);
for ( int i=N; i>0; i-- )
{
printf("%d", L[i-1]);
}
printf("\n");
}
void LongNumSet( char *L, unsigned N, char digit )
{
for (int i = 0; i < N; ++i){
L[i] = digit;
}
}
void LongNumCopy( char *Vin, char *Vout, unsigned N )
{
for ( int i=0; i< N; ++i )
{
Vout[i] = Vin[i];
}
}
char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N )
{
char CARRY = 0,R,aux;
Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);
Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);
for ( int i=0; i< N; ++i )
{
char R = Vin1[i] + Vin2[i] + CARRY;
aux = R <= 9;
Vout[i] = (aux) ? R:R-ten;
CARRY = (aux) ? 0:1;
}
return CARRY;
}
char LongNumAddDigit( char *V, char digit, unsigned N )
{
int i=0;
char R = V[0] + digit;
if ( R < ten){
V[0] = R;
return 0;
}
V[0] = R-ten;
// add carry, maybe iteratively for all digits
char CARRY = 1;
i = 1;
while ( CARRY && i < N )
{
if ( V[i] < 9 )
{
V[i]++;
CARRY = 0;
}
else
{
V[i] = 0;
i++; // CARRY remains set to 1
}
}
return CARRY;
}
char LongNumHorizAdd( char *Vin, char *Vout, unsigned N )
{
char CARRY = 0;
LongNumSet ( Vout, N, 0 );
for ( int i=0; i< N; ++i )
{
LongNumAddDigit ( Vout, Vin[i], N );
}
return 0; // CARRY can never be set
}
char LongNumConstMult( char *V, unsigned N, char digit )
{
char CARRY = 0;
char ja = 0;
for ( int i=0; i< N; ++i )
{
char aux = V[i] * digit;
char R = aux + CARRY;
CARRY = ((u_int32_t)R * (u_int32_t)0xCCCD) >> 19;
ja = (CARRY << 3) + 2*CARRY;
R -= ja;
V[i] = R;
}
return CARRY; // may be from 0 to 9
}
void LongNumMultiply( char *Vin1, char *Vin2, char *VoutH, char *VoutL, unsigned N )
{
// Create Temporal Long Integer with double size
unsigned char *TEMP = (unsigned char*) calloc(2*N,sizeof(unsigned char));
unsigned char *RES = (unsigned char*) calloc( 2*N,sizeof(unsigned char) );
LongNumSet ( RES, 2*N, 0 ); // Set RES to 0
for ( int i=0; i<N; ++i )
{
LongNumSet ( TEMP, 2*N, 0 ); // Set TEMP to 0
LongNumCopy ( Vin1, TEMP+i, N ); // Copy Vin1 -> TEMP, with offset i
LongNumConstMult( TEMP, 2*N, Vin2[i] ); // TEMP * Vin2[i] -> TEMP
LongNumAddition ( TEMP, RES, RES, 2*N ); // TEMP + RES -> RES
}
// Result goes to VoutH-VoutL
LongNumCopy ( RES, VoutL, N ); // Copy RES -> VoutL
LongNumCopy ( RES+N, VoutH, N ); // Copy RES+N -> VoutH
}
int main (int argc, char **argv)
{
int i, sum1, sum2, sum3, N=10000, Rep=50;
seed = 12345;
// obtain parameters at run time
if (argc>1) { N = atoi(argv[1]); }
if (argc>2) { Rep = atoi(argv[2]); }
printf("Challenge #3: Vector size is %d. Repeat %d times\n", N, Rep);
// Create Long Nums
unsigned char *V1= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V2= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V3= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V4= (unsigned char*) malloc( N*sizeof(unsigned char) );
LongNumInit ( V1, N ); LongNumInit ( V2, N ); LongNumInit ( V3, N );
// Repeat
for (i=0; i<Rep; i++)
{
LongNumAddition ( V1, V2, V4, N );
LongNumMultiply ( V3, V4, V2, V1, N );
LongNumHorizAdd ( V1, V2, N );
LongNumAddDigit ( V3, V2[0], N );
}
// Print last 32 digits of Long Numbers
LongNumPrint( V1, 32, "V1" );
LongNumPrint( V2, 32, "V2" );
LongNumPrint( V3, 32, "V3" );
LongNumPrint( V4, 32, "V4" );
free(V1); free(V2); free(V3); free(V4);
return 0;
}
Acording to your usage, instead of LongNumSet you could create and use LongNumClear(not much improvement).
Below are some other potential rewrites of some of your functions. I think you should notice some improvements. For me it's around 44%. I also changed the type from char to unsigned.
#include <string.h>
void LongNumClear(uint8_t *L, size_t N) {
memset (L, 0, N);
}
void LongNumCopy(const uint8_t *Vin, uint8_t *Vout, size_t N) {
memcpy(Vout, Vin, N);
}
uint8_t LongNumAddition(uint8_t * Vin1, uint8_t * Vin2, uint8_t * Vout, size_t N) {
uint8_t carry = 0;
for (size_t i=0; i < N; ++i) {
Vout[i] = Vin1[i] + Vin2[i] + carry;
carry = (Vout[i] > 9);
if (carry) {
Vout[i] -= ten;
}
}
return carry;
}
uint8_t LongNumAddDigit(uint8_t *V, uint8_t digit, size_t N) {
size_t i=0;
V[0] += digit;
if (V[0] < ten) {
return 0;
}
V[0] -= ten;
while ((++i < N) && (V[i] >= 9)) {
V[i] = 0;
}
if ((i != N) && (V[i] < 9)) {
V[i]++;
return 0;
}
return 1;
}
uint8_t LongNumConstMult(uint8_t *V, size_t N, uint8_t digit) {
uint8_t carry = 0;
for (size_t i = 0; i < N; ++i ) {
V[i] = V[i] * digit + carry;
carry = ((u_int32_t)V[i] * (u_int32_t)0xCCCD) >> 19; // divide by 10
V[i] -= ((carry << 3) + (carry << 1));
}
return carry;
}
Related
I'm trying to make a program which crosses binary numbers. The problem is with the cross function. It accepts two binary sequences and returns 5 sequences which are the result of crossing the arguments. Somewhy, the first of these sequences has a mess of values, and I cannot really solve this problem. Does anyone have any ideas?
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define BINARY_LEN 5
#define POPULATION 5
// #define CROSS_BINARY_LIMIT 3
unsigned randrange(unsigned lower, unsigned upper)
{
return lower + rand() / (RAND_MAX / (upper - lower + 1) + 1);
}
unsigned char *int_to_bin(unsigned number)
{
unsigned char *binary = malloc(BINARY_LEN);
unsigned count = 0;
while (number > 0)
{
binary[count] = number % 2;
number /= 2;
count++;
}
return binary;
}
unsigned char **cross(unsigned char *parent_1, unsigned char *parent_2)
{
unsigned char **offspring = malloc(POPULATION);
unsigned cross_binary_point;
for (unsigned char i = 0; i < POPULATION; i++)
{
cross_binary_point = randrange(0, BINARY_LEN);
offspring[i] = malloc(BINARY_LEN);
for (unsigned char j = 0; j < BINARY_LEN; j++)
{
if (j < cross_binary_point)
{
offspring[i][j] = parent_1[j];
}
else
{
offspring[i][j] = parent_2[j];
}
}
}
return offspring;
}
int main(void)
{
unsigned char *x = int_to_bin(14);
unsigned char *y = int_to_bin(18);
for (unsigned char i = BINARY_LEN; i > 0; i--)
{
printf("%hhu", x[i - 1]);
}
printf("\n");
for (unsigned char i = BINARY_LEN; i > 0; i--)
{
printf("%hhu", y[i - 1]);
}
printf("\n\n");
unsigned char **ofspr = cross(x, y);
printf("%s\n", ofspr[0]); // Try to check out what's wrong with the first array
for (unsigned char i = 0; i < POPULATION; i++)
{
for (unsigned char j = BINARY_LEN; j > 0; j--)
{
printf("%hhu", ofspr[i][j]);
}
printf("\n");
}
free(ofspr);
free(x);
free(y);
}
The output is like this:
01110
10010
`w;
00059119
01011
01001
01111
01011
Maybe there is some memory conflict stuff, but I do not have any ideas
unsigned char **offspring = malloc(POPULATION);
only allocates 5 bytes, you want 5 pointers
should be
unsigned char **offspring = malloc(POPULATION * sizeof(char*));
I'm writing a program which operates with arrays of Big Integer Numbers and does basic operations.
I'm worried about the performance of my code. It executes in 47 seconds when compiled with:
gcc -Ofast -funroll-all-loops -ftree-vectorize -fopt-info-vec -g -lm $1 -o ${2}.Ofast
I thought the key to solving my problem is changing the data type of my arrays to unsigned long long, but when I do the result is different.
Any suggestion is welcome, even if it changes my whole program or data types as long as it doesn't change the outcome of my program.
Also, I have been able to visualize that my performance problem is mainly in the LongNumAddition and LongNumAddDigit functions, how can I improve my code? Thanks for your attention.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
// Variable used to generate pseudo-random numbers
unsigned int seed;
unsigned int temp;
unsigned int var1 = 214013;
unsigned int var2 = 2531011;
#define val13 13
#define ten 10
// Function to generate pseudo-random numbers
inline int myRandom() {
temp = var1*seed;
seed = temp + var2;
return (seed>>val13);
}
void LongNumInit( uint8_t *L, size_t N )
{
for(size_t i = 0; i < N; ++i){
L[i] = myRandom() % 10;
}
}
void LongNumPrint( uint8_t *L, size_t N, uint8_t *Name )
{
printf("%s:", Name);
for ( size_t i=N; i>0;--i )
{
printf("%d", L[i-1]);
}
printf("\n");
}
void LongNumSet( uint8_t *L, size_t N){
memset(L,0,N);
}
void LongNumCopy( const uint8_t *Vin, uint8_t *Vout, size_t N )
{
memcpy(Vout,Vin,N);
}
uint8_t LongNumAddition( uint8_t *Vin1, uint8_t *Vin2,uint8_t *Vout, size_t N)
{
uint8_t CARRY = 0;
for ( size_t i=0; i< N; ++i )
{
Vout[i] = Vin1[i] + Vin2[i] + CARRY;
CARRY = (Vout[i] > 9);
if(CARRY){
Vout[i] -= ten;
}
}
return CARRY;
}
uint8_t LongNumAddDigit( uint8_t *V, uint8_t digit, size_t N )
{
size_t i=0;
V[0] += digit;
if ( V[0] < ten){
return 0;
}
V[0] -=ten;
// add carry, maybe iteratively for all digits
while ((++i < N) && (V[i] >= 9))
{
V[i] = 0;
}
if((i != N) && (V[i] < 9)){
V[i]++;
return 0;
}
return 1;
}
uint8_t LongNumHorizAdd( uint8_t *Vin, uint8_t *Vout, size_t N )
{
uint8_t CARRY = 0;
LongNumSet ( Vout, N);
for ( size_t i=0; i< N; ++i )
{
LongNumAddDigit ( Vout, Vin[i], N );
}
return 0; // CARRY can never be set
}
uint8_t LongNumConstMult( uint8_t *V, size_t N, uint8_t digit )
{
uint8_t CARRY = 0;
for ( size_t i=0; i< N; ++i )
{
V[i] = V[i] * digit + CARRY;
CARRY = ((u_int32_t)V[i] * (u_int32_t)0xCCCD) >> 19;
V[i] -= (CARRY << 3) + (CARRY << 1);
}
return CARRY; // may be from 0 to 9
}
void LongNumMultiply( uint8_t *Vin1, uint8_t *Vin2, uint8_t *VoutH, uint8_t *VoutL, size_t N)
{
// Create Temporal Long Integer with double size
uint8_t *TEMP = (uint8_t*) malloc(2*N*sizeof(uint8_t));
uint8_t *RES = (uint8_t*) malloc( 2*N*sizeof(uint8_t));
LongNumSet ( RES, 2*N); // Set RES to 0
for ( size_t i=0; i<N; ++i )
{
LongNumSet ( TEMP, 2*N); // Set TEMP to 0
LongNumCopy ( Vin1, TEMP+i, N ); // Copy Vin1 -> TEMP, with offset i
LongNumConstMult( TEMP, 2*N, Vin2[i] ); // TEMP * Vin2[i] -> TEMP
LongNumAddition ( TEMP, RES, RES, 2*N ); // TEMP + RES -> RES
}
// Result goes to VoutH-VoutL
LongNumCopy ( RES, VoutL, N ); // Copy RES -> VoutL
LongNumCopy ( RES+N, VoutH, N ); // Copy RES+N -> VoutH
}
int main (int argc, char **argv)
{
int i, sum1, sum2, sum3, N=10000, Rep=50;
seed = 12345;
// obtain parameters at run time
if (argc>1) { N = atoi(argv[1]); }
if (argc>2) { Rep = atoi(argv[2]); }
printf("Challenge #3: Vector size is %d. Repeat %d times\n", N, Rep);
// Create Long Nums
unsigned char *V1= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V2= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V3= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V4= (unsigned char*) malloc( N*sizeof(unsigned char) );
LongNumInit ( V1, N ); LongNumInit ( V2, N ); LongNumInit ( V3, N );
// Repeat
for (i=0; i<Rep; i++)
{
LongNumAddition ( V1, V2, V4, N );
LongNumMultiply ( V3, V4, V2, V1, N );
LongNumHorizAdd ( V1, V2, N );
LongNumAddDigit ( V3, V2[0], N );
}
// Print last 32 digits of Long Numbers
LongNumPrint( V1, 32, "V1" );
LongNumPrint( V2, 32, "V2" );
LongNumPrint( V3, 32, "V3" );
LongNumPrint( V4, 32, "V4" );
free(V1); free(V2); free(V3); free(V4);
return 0;
}
Use a profiler - strongly recommend kcachegrind
https://kcachegrind.github.io/html/Usage.html
first, install valgrind and kcachegrind, then compile your binary using '-g' using gcc, and then run your binary using
valgrind --tool=callgrind ./yourbinary parameters ...
then, run kcachgrind in the current directory, switch view to display line-by-line timing information, like this
https://kcachegrind.github.io/html/Shot4Large.html
this tells you which lines of your code took most of the runtime.
I'm developing a program that read from CSV file and calculate score with a method "calculateMLpa". The method receive array of char and array of 10 float, and transform array of float in matrix 3x3. When read the position 3rd number from array, insert in matrix the 4th number and same for 6th number.
I.E.
array value[]={0.000000;123.814934;234.000000;100.000000;166.000000; 203.086639;383.000000;186.000000;338.000000;173.098419 }
array traj[]={"0-0";"0-1";"0-2";"1-0";"1-1";"1-2";"2-0";"2-1";"2-2"}
Xn_val[]={"0","1","2"}
When transform in matrix the result is:
123.814934 234.000000 166.000000
166.000000 203.086639 186.000000
186.000000 338.000000 173.098419
While the expected for [0;2] is 100.000000 and for [1;2]=383.000000, but when print the currently value of traj it's correct.
How can I fix this problem?
The code is all here:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>
#define ARRAYSIZE(x) (sizeof(x)/sizeof(*(x)))
int csv_parse ( char *line, int size )
{
char *p;
char *dp;
int inquote;
int na;
int nTo_comma;
char prevc = ',';
char *list[256];
dp = NULL;
// inquote = 0;
na = 0;
prevc = ';';
nTo_comma=0;
for ( p = line; *p != '\n'; p++ )
{
nTo_comma++;
list[nTo_comma] = p;
if(*p == prevc)
{
printf("%s\t", list);
return na;
}
}
printf("\n");
return na;
}
double calculateMLpa(const char *Xn_val[], char *traj[], float value[], double alphaxixj, double tauxi, int sz, int dim) {
double mlx = 0;
double v;
double alphaxi;
char *state;
int i;
int p;
int j;
int k;
// int sz = sizeof(Xn_val) / sizeof(int);
// int dim = sizeof(traj) / sizeof(char);
double trns[sz][sz];
double m[sz];
char *trat="-";
// m[xi] values: the number of transitions leaving the state xi
printf("%d %d \n",sz,dim);
int cont=0;
for (i = 0; i <= sz; i++) {
m[i] = 0.0;
for (j = 0; j <= sz; j++) {
v = 0.0;
int newlength = strlen(Xn_val[i])+strlen(trat)+strlen(Xn_val[j])+1;
state = malloc(sizeof(char)*newlength);
if(state != NULL){
state[0] = '\0';
strcat(state,Xn_val[i]);
strcat(state,trat);
strcat(state,Xn_val[j]);
printf("%s ",state);
}else {
printf(stderr,"malloc failed!\n");
}
// for (k=0; k<=dim;++k){
if (traj[cont] != NULL ){
if (strcmp(traj[cont],state)==0){
v = value[cont+1];
printf("%f \n",v);
}
}
trns[i][j] = v;
printf("%f - \n",trns[i][j]);
if (strcmp(Xn_val[i],Xn_val[j])!=0)
m[i] = m[i] + v;
cont++;
}
}
for (i=0;i<=sz;++i){
for(j=0;j<=sz;++j){
printf("%f ",trns[i][j]);
}
printf("\n");
}
for (p=0;p<=sz;++p){
printf("%f - \n",m[p]);
}
printf("%f %f\n",trns[0][1],trns[0][2]);
alphaxi = alphaxixj * (((double) sz) - 1.0);
alphaxi = alphaxixj;
printf("%d ",sz);
for (i = 0; i <= sz; i++) {
for (j = 0; j <= sz; j++) {
// xi!=xj
if (strcmp(Xn_val[i], Xn_val[j])!=0) {
mlx = mlx + lgamma(alphaxixj + trns[i][j]) - lgamma(alphaxixj);
}
// xi
else {
mlx = mlx + lgamma(alphaxi) - lgamma(alphaxi + m[i]);
mlx = mlx + lgamma(alphaxi + m[i] + 1.0)+ (alphaxi + 1.0) * log(tauxi);
mlx = mlx - lgamma(alphaxi + 1.0)- (alphaxi + m[i] + 1.0) * log(tauxi + trns[i][j]);
}
}
}
return (mlx);
}
#define MAXFLDS 200 /* maximum possible number of fields */
#define MAXFLDSIZE 32 /* longest possible field + 1 = 31 byte field */
void parse(char *record, char *delim, char arr[][MAXFLDSIZE], int *fldcnt) {
char*p = strtok(record, delim);
int fld = 0;
while (p) {
strcpy(arr[fld], p);
fld++;
p = strtok('\0', delim);
}
*fldcnt = fld;
}
void main() {
printf("inizio\n");
FILE *pf;
int N=20;
bool first=true;
const char *a[]={"0","1","2"};
char *traject[]={"0-0","0-1","0-2","1-0","1-1","1-2","2-0","2-1","2-2"};
double bs=0;
char *trat="-";
pf=fopen("//home//user//prova.csv","r");
float array[10][10];
float *t;
char *str= "hello";
char *state;
t = (float *)malloc(N * sizeof(float));
int f=0;
if (pf)
{
size_t i, j, k;
char buffer[BUFSIZ], *ptr;
/*
* Read each line from the file.
*/
for ( i = 0; fgets(buffer, sizeof buffer, pf); ++i )
{
/*
* Parse the comma-separated values from each line into 'array'.
*/
for ( j = 0, ptr = buffer; j < ARRAYSIZE(*array); ++j, ++ptr )
{
array[i][j] = strtof(ptr, &ptr);
}
}
fclose(pf);}
else /* fopen() returned NULL */
{
perror(pf);
}
for(f=0; f<10; ++f){
if(f==0){}
else if(f==1 && array[f][8]==0)
array[f][8]=123.8149353;
t[f]=array[f][8];
//printf("%f \n",t[f]);
}
for (f=0;f<10; ++f){
printf("%f - ",t[f]);
}
//printf("%s, %s, %s \n",a[0],a[1],a[2]);
printf("start\n");
int sz = sizeof(a) / sizeof(char);
int dim = sizeof(traject) / sizeof(char);
printf("%d , %d \n",sz,dim);
bs=calculateMLpa(a,traject,t,1.0,0.1,sz,dim);
printf("done \n");
printf("%f ",bs);
}
EDIT
I try to pass array size
sz=sizeof(a)/sizeof(char)
dim = sizeof(traject) / sizeof(char);
but their value is 24 and 72 respectively, and the execution stops at 0-2 value 100.000000
Arrays passed to functions decay to pointers to the start of the array. So
#define ARRAYSIZE(x) (sizeof(x)/sizeof(*(x)))
Will not return anything meaningful when checking for its size in that case
To fix, pass the Array size as an additional Argument.
One major problem is that when you pass arrays to functions, they decay to pointers, and the sizeof trick you use to get the array size will not work.
You need to pass the actual array sizes as arguments.
I want to convert array of bytes bytes1 (little endian), 2 by 2, into an array of short integers, and vice versa . I expect to get final array bytes2, equal to initial array bytes1. I have code like this:
int i = 0;
int j = 0;
char *bytes1;
char *bytes2;
short *short_ints;
bytes1 = (char *) malloc( 2048 );
bytes2 = (char *) malloc( 2048 );
short_ints = (short *) malloc( 2048 );
for ( i=0; i<2048; i+=2)
{
short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ;
j++;
}
j = 0;
for ( i=0; i<2048; i+=2)
{
bytes2[i+1] = (short_ints[j] >> 8) & 0xff;
bytes2[i] = (short_ints[j]) ;
j++;
}
j = 0;
Now, can someone tell me why I haven't got bytes2 array, completely the same as bytes1 ? And how to do this properly?
Suggest 2 functions. Do all combining and extraction as unsigned to remove issues with the sign bit in short and maybe char.
The sign bit is OP's code biggest problem. short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ; likely does a sign extend with bytes1[i] conversion to int.
Also (short_ints[j] >> 8) does a sign extend.
// Combine every 2 char (little endian) into 1 short
void charpair_short(short *dest, const char *src, size_t n) {
const unsigned char *usrc = (const unsigned char *) src;
unsigned short *udest = (unsigned short *) dest;
if (n % 2) Handle_OddError();
n /= 2;
while (n-- > 0) {
*udest = *usrc++;
*udest += *usrc++ * 256u;
udest++;
}
}
// Break every short into 2 char (little endian)
void short_charpair(char *dest, const short *src, size_t n) {
const unsigned short *usrc = (const unsigned short *) src;
unsigned char *udest = (unsigned char *) dest;
if (n % 2) Handle_OddError();
n /= 2;
while (n-- > 0) {
*udest++ = (unsigned char) (*usrc);
*udest++ = (unsigned char) (*usrc / 256u);
usrc++;
}
}
int main(void) {
size_t n = 2048; // size_t rather than int has advantages for array index
// Suggest code style: type *var = malloc(sizeof(*var) * N);
// No casting of return
// Use sizeof() with target pointer name rather than target type.
char *bytes1 = malloc(sizeof * bytes1 * n);
Initialize(bytes, n); //TBD code for OP-best to not work w/uninitialized data
// short_ints = (short *) malloc( 2048 );
// This is weak as `sizeof(short)!=2` is possible
short *short_ints = malloc(sizeof * short_ints * n/2);
charpair_short(short_ints, bytes1, n);
char *bytes2 = malloc(sizeof * bytes2 * n);
short_charpair(bytes2, short_ints, n);
compare(bytes1, bytes2, n); // TBD code for OP
// epilogue
free(bytes1);
free(short_ints);
free(bytes2);
return 0;
}
Avoided the union approach as that is platform endian dependent.
Here's a program that demonstrates that you are experiencing the problem associated with bit-shifting signed integral values.
#include <stdio.h>
#include <stdlib.h>
void testCore(char bytes1[],
char bytes2[],
short short_ints[],
int size)
{
int i = 0;
int j = 0;
for ( i=0; i<size; i+=2)
{
short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ;
j++;
}
j = 0;
for ( i=0; i<size; i+=2)
{
bytes2[i+1] = (short_ints[j] >> 8) & 0xff;
bytes2[i] = (short_ints[j]) ;
j++;
}
for ( i=0; i<size; ++i)
{
if ( bytes1[i] != bytes2[i] )
{
printf("%d-th element is not equal\n", i);
}
}
}
void test1()
{
char bytes1[4] = {-10, 0, 0, 0};
char bytes2[4];
short short_ints[2];
testCore(bytes1, bytes2, short_ints, 4);
}
void test2()
{
char bytes1[4] = {10, 0, 0, 0};
char bytes2[4];
short short_ints[2];
testCore(bytes1, bytes2, short_ints, 4);
}
int main()
{
printf("Calling test1 ...\n");
test1();
printf("Done\n");
printf("Calling test2 ...\n");
test2();
printf("Done\n");
return 0;
}
Output of the program:
Calling test1 ...
1-th element is not equal
Done
Calling test2 ...
Done
Udate
Here's a version of testCore that works for me:
void testCore(char bytes1[],
char bytes2[],
short short_ints[],
int size)
{
int i = 0;
int j = 0;
unsigned char c1;
unsigned char c2;
unsigned short s;
for ( i=0; i<size; i+=2)
{
c1 = bytes1[i];
c2 = bytes1[i+1];
short_ints[j] = (c2 << 8) | c1;
j++;
}
j = 0;
for ( i=0; i<size; i+=2)
{
s = short_ints[j];
s = s >> 8;
bytes2[i+1] = s;
bytes2[i] = short_ints[j] & 0xff;
j++;
}
for ( i=0; i<size; ++i)
{
if ( bytes1[i] != bytes2[i] )
{
printf("%d-th element is not equal\n", i);
}
}
}
It is tested with:
char bytes1[4] = {-10, 0, 25, -4};
and
char bytes1[4] = {10, -2, 25, 4};
Well, what you need is a UNION:
#include <stdio.h>
#include <string.h>
union MyShort {
short short_value;
struct {
char byte1;
char byte2;
};
};
int main(int argc, const char * argv[])
{
char a[4]="abcd";
char b[4]="1234";
short c[5]; c[4]=0;
union MyShort d;
for (int i = 0; i<4; i++) {
d.byte1 = a[i];
d.byte2 = b[i];
c[i] = d.short_value;
}//next i
printf("%s\n", (char*)c);
return 0;
}
the result should be a1b2c3d4.
Had a code which was used to test out DDR and it used to work fine with GCC-4.2.4 but after we moved to GCC-4.7.0 bit flips are observed after the below code is executed ; is this due to a bug in the code or I need to handle something for the new compiler ; given below is the code snippet :
int test_bitflip_comparison(unsigned long volatile *bufa, unsigned long volatile *bufb, size_t count) {
unsigned long volatile *p1 = bufa;
unsigned long volatile *p2 = bufb;
unsigned int j, k;
unsigned long q;
size_t i;
char *msgBuf;
for (k = 0; k < UL_LEN; k++) {
q = ONE << k;
for (j = 0; j < 8; j++) {
q = ~q;
p1 = (unsigned long volatile *) bufa;
p2 = (unsigned long volatile *) bufb;
for (i = 0; i < count; i++) {
*p1++ = *p2++ = (i % 2) == 0 ? q : ~q;
}
if (compare_regions(bufa, bufb, count)) {
return -1;
}
}
if(!(k % 10))
puts1(".\0");
}
return 0;
}
int compare_regions(unsigned long volatile *bufa, unsigned long volatile *bufb, size_t count) {
int r = 0;
size_t i;
unsigned long physaddr;
unsigned long volatile *p1 = bufa;
unsigned long volatile *p2 = bufb;
for (i = 0; i < count; i++, p1++, p2++) {
if (*p1 != *p2) {
if (use_phys) {
physaddr = physaddrbase + (i * sizeof(unsigned long));
} else {
}
r = -1;
}
}
return r;
}