How to improve the performance of this code?

How to improve the performance of this code? - c

I'm writing a program which operates with arrays of Big Integer Numbers and does basic operations.
I'm worried about the performance of my code. It executes in 47 seconds when compiled with:
gcc -Ofast -funroll-all-loops -ftree-vectorize -fopt-info-vec -g -lm $1 -o ${2}.Ofast
I thought the key to solving my problem is changing the data type of my arrays to unsigned long long, but when I do the result is different.
Any suggestion is welcome, even if it changes my whole program or data types as long as it doesn't change the outcome of my program.
Also, I have been able to visualize that my performance problem is mainly in the LongNumAddition and LongNumAddDigit functions, how can I improve my code? Thanks for your attention.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
// Variable used to generate pseudo-random numbers
unsigned int seed;
unsigned int temp;
unsigned int var1 = 214013;
unsigned int var2 = 2531011;
#define val13 13
#define ten 10
// Function to generate pseudo-random numbers
inline int myRandom() {
temp = var1*seed;
seed = temp + var2;
return (seed>>val13);
}
void LongNumInit( uint8_t *L, size_t N )
{
for(size_t i = 0; i < N; ++i){
L[i] = myRandom() % 10;
}
}
void LongNumPrint( uint8_t *L, size_t N, uint8_t *Name )
{
printf("%s:", Name);
for ( size_t i=N; i>0;--i )
{
printf("%d", L[i-1]);
}
printf("\n");
}
void LongNumSet( uint8_t *L, size_t N){
memset(L,0,N);
}
void LongNumCopy( const uint8_t *Vin, uint8_t *Vout, size_t N )
{
memcpy(Vout,Vin,N);
}
uint8_t LongNumAddition( uint8_t *Vin1, uint8_t *Vin2,uint8_t *Vout, size_t N)
{
uint8_t CARRY = 0;
for ( size_t i=0; i< N; ++i )
{
Vout[i] = Vin1[i] + Vin2[i] + CARRY;
CARRY = (Vout[i] > 9);
if(CARRY){
Vout[i] -= ten;
}
}
return CARRY;
}
uint8_t LongNumAddDigit( uint8_t *V, uint8_t digit, size_t N )
{
size_t i=0;
V[0] += digit;
if ( V[0] < ten){
return 0;
}
V[0] -=ten;
// add carry, maybe iteratively for all digits
while ((++i < N) && (V[i] >= 9))
{
V[i] = 0;
}
if((i != N) && (V[i] < 9)){
V[i]++;
return 0;
}
return 1;
}
uint8_t LongNumHorizAdd( uint8_t *Vin, uint8_t *Vout, size_t N )
{
uint8_t CARRY = 0;
LongNumSet ( Vout, N);
for ( size_t i=0; i< N; ++i )
{
LongNumAddDigit ( Vout, Vin[i], N );
}
return 0; // CARRY can never be set
}
uint8_t LongNumConstMult( uint8_t *V, size_t N, uint8_t digit )
{
uint8_t CARRY = 0;
for ( size_t i=0; i< N; ++i )
{
V[i] = V[i] * digit + CARRY;
CARRY = ((u_int32_t)V[i] * (u_int32_t)0xCCCD) >> 19;
V[i] -= (CARRY << 3) + (CARRY << 1);
}
return CARRY; // may be from 0 to 9
}
void LongNumMultiply( uint8_t *Vin1, uint8_t *Vin2, uint8_t *VoutH, uint8_t *VoutL, size_t N)
{
// Create Temporal Long Integer with double size
uint8_t *TEMP = (uint8_t*) malloc(2*N*sizeof(uint8_t));
uint8_t *RES = (uint8_t*) malloc( 2*N*sizeof(uint8_t));
LongNumSet ( RES, 2*N); // Set RES to 0
for ( size_t i=0; i<N; ++i )
{
LongNumSet ( TEMP, 2*N); // Set TEMP to 0
LongNumCopy ( Vin1, TEMP+i, N ); // Copy Vin1 -> TEMP, with offset i
LongNumConstMult( TEMP, 2*N, Vin2[i] ); // TEMP * Vin2[i] -> TEMP
LongNumAddition ( TEMP, RES, RES, 2*N ); // TEMP + RES -> RES
}
// Result goes to VoutH-VoutL
LongNumCopy ( RES, VoutL, N ); // Copy RES -> VoutL
LongNumCopy ( RES+N, VoutH, N ); // Copy RES+N -> VoutH
}
int main (int argc, char **argv)
{
int i, sum1, sum2, sum3, N=10000, Rep=50;
seed = 12345;
// obtain parameters at run time
if (argc>1) { N = atoi(argv[1]); }
if (argc>2) { Rep = atoi(argv[2]); }
printf("Challenge #3: Vector size is %d. Repeat %d times\n", N, Rep);
// Create Long Nums
unsigned char *V1= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V2= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V3= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V4= (unsigned char*) malloc( N*sizeof(unsigned char) );
LongNumInit ( V1, N ); LongNumInit ( V2, N ); LongNumInit ( V3, N );
// Repeat
for (i=0; i<Rep; i++)
{
LongNumAddition ( V1, V2, V4, N );
LongNumMultiply ( V3, V4, V2, V1, N );
LongNumHorizAdd ( V1, V2, N );
LongNumAddDigit ( V3, V2[0], N );
}
// Print last 32 digits of Long Numbers
LongNumPrint( V1, 32, "V1" );
LongNumPrint( V2, 32, "V2" );
LongNumPrint( V3, 32, "V3" );
LongNumPrint( V4, 32, "V4" );
free(V1); free(V2); free(V3); free(V4);
return 0;
}

Use a profiler - strongly recommend kcachegrind
https://kcachegrind.github.io/html/Usage.html
first, install valgrind and kcachegrind, then compile your binary using '-g' using gcc, and then run your binary using
valgrind --tool=callgrind ./yourbinary parameters ...
then, run kcachgrind in the current directory, switch view to display line-by-line timing information, like this
https://kcachegrind.github.io/html/Shot4Large.html
this tells you which lines of your code took most of the runtime.

Related

How to vectorize and optimize this functions in C?

I have this functions, the result is correct but the compiler don't vectorize this.
How can I achive that the compiler vectorize this and how can I optimize this codes?
void LongNumSet( char *L, unsigned N, char digit )
{
for (int i = 0; i < N; ++i){
L[i] = digit;
}
}
void LongNumCopy( char *Vin, char *Vout, unsigned N )
{
for ( int i=0; i< N; ++i )
{
Vout[i] = Vin[i];
}
}
char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N )
{
char CARRY = 0,R,aux;
Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);
Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);
for ( int i=0; i< N; ++i )
{
char R = Vin1[i] + Vin2[i] + CARRY;
aux = R <= 9;
Vout[i] = (aux) ? R:R-ten;
CARRY = (aux) ? 0:1;
}
return CARRY;
}
char LongNumAddDigit( char *V, char digit, unsigned N )
{
int i=0;
char R = V[0] + digit;
if ( R < ten){
V[0] = R;
return 0;
}
V[0] = R-ten;
// add carry, maybe iteratively for all digits
char CARRY = 1;
i = 1;
while ( CARRY && i < N )
{
if ( V[i] < 9 )
{
V[i]++;
CARRY = 0;
}
else
{
V[i] = 0;
i++; // CARRY remains set to 1
}
}
return CARRY;
}
I use the comand gcc -O3 -ffast-math -msse -funroll-all-loops -ftree-vectorizer-verbose=25 -lm -g $1 -o ${2}.O3 and I executate the program in 55 s.
This is all of code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
// Variable used to generate pseudo-random numbers
unsigned int seed;
unsigned int temp;
unsigned int var1 = 214013;
unsigned int var2 = 2531011;
#define val13 13
#define ten 10
// Function to generate pseudo-random numbers
inline int myRandom() {
temp = var1*seed;
seed = temp + var2;
return (seed>>val13);
}
void LongNumInit( char *L, unsigned N )
{
for ( int i=0; i<N;++i )
{
L[i] = myRandom() % ten; // digito decimal
}
}
void LongNumPrint( char *L, unsigned N, char *Name )
{
printf("%s:", Name);
for ( int i=N; i>0; i-- )
{
printf("%d", L[i-1]);
}
printf("\n");
}
void LongNumSet( char *L, unsigned N, char digit )
{
for (int i = 0; i < N; ++i){
L[i] = digit;
}
}
void LongNumCopy( char *Vin, char *Vout, unsigned N )
{
for ( int i=0; i< N; ++i )
{
Vout[i] = Vin[i];
}
}
char LongNumAddition( char *__restrict Vin1, char * __restrict Vin2, char * __restrict Vout, unsigned N )
{
char CARRY = 0,R,aux;
Vin1 = (char*)__builtin_assume_aligned (Vin1, 1);
Vin2 = (char*)__builtin_assume_aligned (Vin2, 1);
for ( int i=0; i< N; ++i )
{
char R = Vin1[i] + Vin2[i] + CARRY;
aux = R <= 9;
Vout[i] = (aux) ? R:R-ten;
CARRY = (aux) ? 0:1;
}
return CARRY;
}
char LongNumAddDigit( char *V, char digit, unsigned N )
{
int i=0;
char R = V[0] + digit;
if ( R < ten){
V[0] = R;
return 0;
}
V[0] = R-ten;
// add carry, maybe iteratively for all digits
char CARRY = 1;
i = 1;
while ( CARRY && i < N )
{
if ( V[i] < 9 )
{
V[i]++;
CARRY = 0;
}
else
{
V[i] = 0;
i++; // CARRY remains set to 1
}
}
return CARRY;
}
char LongNumHorizAdd( char *Vin, char *Vout, unsigned N )
{
char CARRY = 0;
LongNumSet ( Vout, N, 0 );
for ( int i=0; i< N; ++i )
{
LongNumAddDigit ( Vout, Vin[i], N );
}
return 0; // CARRY can never be set
}
char LongNumConstMult( char *V, unsigned N, char digit )
{
char CARRY = 0;
char ja = 0;
for ( int i=0; i< N; ++i )
{
char aux = V[i] * digit;
char R = aux + CARRY;
CARRY = ((u_int32_t)R * (u_int32_t)0xCCCD) >> 19;
ja = (CARRY << 3) + 2*CARRY;
R -= ja;
V[i] = R;
}
return CARRY; // may be from 0 to 9
}
void LongNumMultiply( char *Vin1, char *Vin2, char *VoutH, char *VoutL, unsigned N )
{
// Create Temporal Long Integer with double size
unsigned char *TEMP = (unsigned char*) calloc(2*N,sizeof(unsigned char));
unsigned char *RES = (unsigned char*) calloc( 2*N,sizeof(unsigned char) );
LongNumSet ( RES, 2*N, 0 ); // Set RES to 0
for ( int i=0; i<N; ++i )
{
LongNumSet ( TEMP, 2*N, 0 ); // Set TEMP to 0
LongNumCopy ( Vin1, TEMP+i, N ); // Copy Vin1 -> TEMP, with offset i
LongNumConstMult( TEMP, 2*N, Vin2[i] ); // TEMP * Vin2[i] -> TEMP
LongNumAddition ( TEMP, RES, RES, 2*N ); // TEMP + RES -> RES
}
// Result goes to VoutH-VoutL
LongNumCopy ( RES, VoutL, N ); // Copy RES -> VoutL
LongNumCopy ( RES+N, VoutH, N ); // Copy RES+N -> VoutH
}
int main (int argc, char **argv)
{
int i, sum1, sum2, sum3, N=10000, Rep=50;
seed = 12345;
// obtain parameters at run time
if (argc>1) { N = atoi(argv[1]); }
if (argc>2) { Rep = atoi(argv[2]); }
printf("Challenge #3: Vector size is %d. Repeat %d times\n", N, Rep);
// Create Long Nums
unsigned char *V1= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V2= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V3= (unsigned char*) malloc( N*sizeof(unsigned char) );
unsigned char *V4= (unsigned char*) malloc( N*sizeof(unsigned char) );
LongNumInit ( V1, N ); LongNumInit ( V2, N ); LongNumInit ( V3, N );
// Repeat
for (i=0; i<Rep; i++)
{
LongNumAddition ( V1, V2, V4, N );
LongNumMultiply ( V3, V4, V2, V1, N );
LongNumHorizAdd ( V1, V2, N );
LongNumAddDigit ( V3, V2[0], N );
}
// Print last 32 digits of Long Numbers
LongNumPrint( V1, 32, "V1" );
LongNumPrint( V2, 32, "V2" );
LongNumPrint( V3, 32, "V3" );
LongNumPrint( V4, 32, "V4" );
free(V1); free(V2); free(V3); free(V4);
return 0;
}

Acording to your usage, instead of LongNumSet you could create and use LongNumClear(not much improvement).
Below are some other potential rewrites of some of your functions. I think you should notice some improvements. For me it's around 44%. I also changed the type from char to unsigned.
#include <string.h>
void LongNumClear(uint8_t *L, size_t N) {
memset (L, 0, N);
}
void LongNumCopy(const uint8_t *Vin, uint8_t *Vout, size_t N) {
memcpy(Vout, Vin, N);
}
uint8_t LongNumAddition(uint8_t * Vin1, uint8_t * Vin2, uint8_t * Vout, size_t N) {
uint8_t carry = 0;
for (size_t i=0; i < N; ++i) {
Vout[i] = Vin1[i] + Vin2[i] + carry;
carry = (Vout[i] > 9);
if (carry) {
Vout[i] -= ten;
}
}
return carry;
}
uint8_t LongNumAddDigit(uint8_t *V, uint8_t digit, size_t N) {
size_t i=0;
V[0] += digit;
if (V[0] < ten) {
return 0;
}
V[0] -= ten;
while ((++i < N) && (V[i] >= 9)) {
V[i] = 0;
}
if ((i != N) && (V[i] < 9)) {
V[i]++;
return 0;
}
return 1;
}
uint8_t LongNumConstMult(uint8_t *V, size_t N, uint8_t digit) {
uint8_t carry = 0;
for (size_t i = 0; i < N; ++i ) {
V[i] = V[i] * digit + carry;
carry = ((u_int32_t)V[i] * (u_int32_t)0xCCCD) >> 19; // divide by 10
V[i] -= ((carry << 3) + (carry << 1));
}
return carry;
}

Intel Intrinsics code optimization

So i'm trying to multiply a constant with short int a[101] with intel intrinsics. I have done it with addition but i can't seem to figure why it wont work with multiplication. Also before we used ints of 32 bits and now we use 16 bit short so we can have double as many values in the intrinsics to fill the 128 bit as far as i understand?
naive example of what im trying to do:
int main(int argc, char **argv){
short int a[101];
int len = sizeof(a)/sizeof(short);
/*Populating array a with values 1 to 101*/
mult(len, a);
return 0;
}
int mult(int len, short int *a){
int result = 0;
for(int i=0; i<len; i++){
result += a[i]*20;
}
return result;
}
And my code trying to do the same in intrinsics
/*Same main as before with a short int a[101] containing values 1 to 101*/
int SIMD(int len, short int *a){
int res;
int val[4];
/*Setting constant value to mulitply with*/
__m128i sum = _mm_set1_epi16(20);
__m128i s = _mm_setzero_si128( );
for(int i=0; i<len/4*4; i += 4){
__m128i vec = _mm_loadu_si128((__m128i *)(a+i));
s += _mm_mul_epu32(vec,sum);
}
_mm_storeu_si128((__m128i*) val, s);
res += val[0] + val[1] + val[2] + val[3];
/*Haldeling tail*/
for(int i=len/4*4; i<len; i++){
res += a[i];
}
return res;
}
So i do get a number out as result, but the number does not match the naive method, i have tried other intrinsics and changing numbers to see if it makes any noticable difference but nothing comes close to the output i expect. The computation time is almost the same as the naive at the moment aswell.

There are 8 short in one __m128i. So:
for(int i=0; i<len/4*4; i += 4)
should be
for(int i=0; i<len/8*8; i += 8)`
and:
res += val[0] + val[1] + val[2] + val[3];
should be:
res += val[0] + val[1] + val[2] + val[3] + val[4] + val[5] + val[6] + val[7];
and:
for(int i=len/4*4; i<len; i++)
should be:
for(int i=len/8*8; i<len; i++)
In:
s += _mm_mul_epu32(vec,sum);
_mm_mul_epu32 operates on 32-bit elements. It should be:
s += _mm_mullo_epi16(vec, sum);
The object res is not initialized; it should be:
int res = 0;
Here is working code:
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
// Number of elements in an array.
#define NumberOf(x) (sizeof (x) / sizeof *(x))
// Compute the result with scalar arithmetic.
static int mult(int len, short int *a)
{
int result = 0;
for (size_t i=0; i<len; i++)
{
result += a[i]*20;
}
return result;
}
// Compute the result with SIMD arithmetic.
static int SIMD(int len, short int *a)
{
// Initialize the multiplier and the sum.
__m128i multiplier = _mm_set1_epi16(20);
__m128i s = _mm_setzero_si128( );
// Process blocks of 8 short.
for (int i=0; i<len/8*8; i += 8)
{
__m128i vec = _mm_loadu_si128((__m128i *)(a+i));
// Multtiply by multiplier and add to sum.
s = _mm_add_epi16(s, _mm_mullo_epi16(vec, multiplier));
}
// Store the sum so far so its individual elements can be manipulated.
short val[8];
_mm_storeu_si128((__m128i*) val, s);
// Add the individual elements.
int res = 0;
for (size_t i = 0; i < 8; ++i)
res += val[i];
// Add the elements in the tail.
for (size_t i = len/8*8; i < len; ++i)
{
res += a[i];
}
return res;
}
int main(int argc, char **argv)
{
short int a[96];
int len = NumberOf(a);
// Initiailize a.
for (size_t i = 0; i < len; ++i)
a[i] = i+1;
printf("sum by scalar arithmetic is %d.\n", mult(len, a));
printf("sum by SIMD arithmetic is %d.\n", SIMD(len, a));
return 0;
}

C: qsort doesn't seem to work with unsigned long

Can anyone please tell me what is wrong with the following example? I took it from here and replaced int by unsigned long. I also changed the cmpfunc to properly handle unsigned long.
#include <stdio.h>
#include <stdlib.h>
unsigned long values[] = { 88, 56, 100, 2, 25 };
int cmpfunc (const void * a, const void * b)
{
if(*(unsigned long*)a - *(unsigned long*)b < 0){
return -1;
}
if(*(unsigned long*)a - *(unsigned long*)b > 0){
return 1;
}
if(*(unsigned long*)a - *(unsigned long*)b == 0){
return 0;
}
}
int main()
{
int n;
printf("Before sorting the list is: \n");
for( n = 0 ; n < 5; n++ )
{
printf("%lu ", values[n]);
}
qsort(values, 5, sizeof(unsigned long), cmpfunc);
printf("\nAfter sorting the list is: \n");
for( n = 0 ; n < 5; n++ )
{
printf("%lu ", values[n]);
}
return(0);
}
Here is the output I got:
Before sorting the list is:
88 56 100 2 25
After sorting the list is:
25 2 100 56 88

You comparison function is incorrect. A subtraction of unsigned values can wrap the value giving an incorrect result.
The function should only compare the values:
int compare( const void* a , const void* b )
{
const unsigned long ai = *( const unsigned long* )a;
const unsigned long bi = *( const unsigned long* )b;
if( ai < bi )
{
return -1;
}
else if( ai > bi )
{
return 1;
}
else
{
return 0;
}
}

how to print memory bits in c

I'm learning how numbers are represented in memory. I want to know how to print the actual representation (binary or hexadecimal) in memory of some int and float variables.
I'd like to see what happens with that numbers when adding or subtracting it causes overflow, for example.
How can I access memory and print it?

You would need to assign a pointer to the variable in question to a char *, and treat it as an array of bytes of length sizeof(variable). Then you can print each byte in hex using the %X format specifier to printf.
You can define a function like this:
void print_bytes(void *ptr, int size)
{
unsigned char *p = ptr;
int i;
for (i=0; i<size; i++) {
printf("%02hhX ", p[i]);
}
printf("\n");
}
And call it like this:
int x = 123456;
double y = 3.14;
print_bytes(&x, sizeof(x));
print_bytes(&y, sizeof(y));

... to print the actual representation (binary ...
To convert any variable/object to a string that encodes the binary form uses a helper function that converts memory into a "binary" string. This method also handles function pointers. Uses C99 or later.
#include <stdio.h>
#include <assert.h>
#include <limits.h>
// .... compound literal .......
#define VAR_TO_STR_BIN(x) obj_to_bin((char [sizeof(x)*CHAR_BIT + 1]){""}, &(x), sizeof (x))
char *obj_to_bin(char *dest, void *object, size_t osize) {
const unsigned char *p = (const unsigned char *) object;
p += osize;
char *s = dest;
while (osize-- > 0) {
p--;
unsigned i = CHAR_BIT;
while (i-- > 0) {
*s++ = ((*p >> i) & 1) + '0';
}
}
*s = '\0';
return dest;
}
int main(void) {
int i = 42;
double d = 3.1415926535897932384626433832795;
printf("Sample\ndouble pi:%s\nint 42:%s\n", VAR_TO_STR_BIN(d), VAR_TO_STR_BIN(i) );
return 0;
}
Output (Note: depending in endian-ness, results may vary)
Sample
double pi:0100000000001001001000011111101101010100010001000010110100011000
int 42:00000000000000000000000000101010
This approach is easy to adapt to hexadecimal form.

Let's say you have a int variable called memory. Make sure you see how many bits it is; for many processors an int is 32 bits as well as a memory address. So you need to loop through each bit, like this:
unsigned int memory = 1234;
for (int i = 0; i < 32; i++)
{
printf("%d ", memory >> i & 1);
}
This simple method ORs each bit with 1 and shifts each bit by 1.

#include <stdio.h>
#include <stdlib.h>
void print_bits ( void* buf, size_t size_in_bytes )
{
char* ptr = (char*)buf;
for (size_t i = 0; i < size_in_bytes; i++) {
for (short j = 7; j >= 0; j--) {
printf("%d", (ptr[i] >> j) & 1);
}
printf(" ");
}
printf("\n");
}
int main ( void )
{
size_t n;
scanf("%d", &n);
print_bits(&n, sizeof(n));
return 0;
}
This prints bits of the specified object (n here) with the specified size (in bytes).

#dbush, #Anton, I mixed your codes. It's okay?
#include <stdio.h>
#include <stdlib.h>
void print_bytes( void *ptr, size_t size ) ;
int main( void )
{
int x = 123456 ;
double y = 3.14 ;
print_bytes( &x, sizeof(x) ) ;
print_bytes( &y, sizeof(y) ) ;
return 0 ;
}
void print_bytes( void *ptr, size_t size )
{
//char *buf = (char*) ptr;
unsigned char *p = ptr ;
for( size_t i = 0; i < size; i++ )
{
printf( "%02hhX ", p[i] ) ;
}
printf( "\n" ) ;
for( size_t i = 0; i < size; i++ )
{
for( short j = 7; j >= 0; j-- )
{
printf( "%d", ( p[i] >> j ) & 1 ) ;
}
printf(" ");
}
printf("\n");
}

Call print_bits(memory address of variable, size of variable in byte).
void print_bits(void *ptr, int size) //ptr = memory address of variable, size = size of variable in byte
{
long long *ch = ptr;
int size_bits = size * 8;
for(int i = size_bits-1; i>=0; i--){
printf("%lld", *ch >> i & 1) ;
}
}
It has been tested successfully, working with any variable of less than or equal to 64 bits. This will probably work correctly with variables with other sizes (Not Tested).
Calling:
double d = -7.92282286274e+28;
print_bits(&d, sizeof(d));
Output:
1100010111110000000000000000000011100000000000000000000100010111

c; converting 2 bytes to short and vice versa

I want to convert array of bytes bytes1 (little endian), 2 by 2, into an array of short integers, and vice versa . I expect to get final array bytes2, equal to initial array bytes1. I have code like this:
int i = 0;
int j = 0;
char *bytes1;
char *bytes2;
short *short_ints;
bytes1 = (char *) malloc( 2048 );
bytes2 = (char *) malloc( 2048 );
short_ints = (short *) malloc( 2048 );
for ( i=0; i<2048; i+=2)
{
short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ;
j++;
}
j = 0;
for ( i=0; i<2048; i+=2)
{
bytes2[i+1] = (short_ints[j] >> 8) & 0xff;
bytes2[i] = (short_ints[j]) ;
j++;
}
j = 0;
Now, can someone tell me why I haven't got bytes2 array, completely the same as bytes1 ? And how to do this properly?

Suggest 2 functions. Do all combining and extraction as unsigned to remove issues with the sign bit in short and maybe char.
The sign bit is OP's code biggest problem. short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ; likely does a sign extend with bytes1[i] conversion to int.
Also (short_ints[j] >> 8) does a sign extend.
// Combine every 2 char (little endian) into 1 short
void charpair_short(short *dest, const char *src, size_t n) {
const unsigned char *usrc = (const unsigned char *) src;
unsigned short *udest = (unsigned short *) dest;
if (n % 2) Handle_OddError();
n /= 2;
while (n-- > 0) {
*udest = *usrc++;
*udest += *usrc++ * 256u;
udest++;
}
}
// Break every short into 2 char (little endian)
void short_charpair(char *dest, const short *src, size_t n) {
const unsigned short *usrc = (const unsigned short *) src;
unsigned char *udest = (unsigned char *) dest;
if (n % 2) Handle_OddError();
n /= 2;
while (n-- > 0) {
*udest++ = (unsigned char) (*usrc);
*udest++ = (unsigned char) (*usrc / 256u);
usrc++;
}
}
int main(void) {
size_t n = 2048; // size_t rather than int has advantages for array index
// Suggest code style: type *var = malloc(sizeof(*var) * N);
// No casting of return
// Use sizeof() with target pointer name rather than target type.
char *bytes1 = malloc(sizeof * bytes1 * n);
Initialize(bytes, n); //TBD code for OP-best to not work w/uninitialized data
// short_ints = (short *) malloc( 2048 );
// This is weak as `sizeof(short)!=2` is possible
short *short_ints = malloc(sizeof * short_ints * n/2);
charpair_short(short_ints, bytes1, n);
char *bytes2 = malloc(sizeof * bytes2 * n);
short_charpair(bytes2, short_ints, n);
compare(bytes1, bytes2, n); // TBD code for OP
// epilogue
free(bytes1);
free(short_ints);
free(bytes2);
return 0;
}
Avoided the union approach as that is platform endian dependent.

Here's a program that demonstrates that you are experiencing the problem associated with bit-shifting signed integral values.
#include <stdio.h>
#include <stdlib.h>
void testCore(char bytes1[],
char bytes2[],
short short_ints[],
int size)
{
int i = 0;
int j = 0;
for ( i=0; i<size; i+=2)
{
short_ints[j] = bytes1[i+1] << 8 | bytes1[i] ;
j++;
}
j = 0;
for ( i=0; i<size; i+=2)
{
bytes2[i+1] = (short_ints[j] >> 8) & 0xff;
bytes2[i] = (short_ints[j]) ;
j++;
}
for ( i=0; i<size; ++i)
{
if ( bytes1[i] != bytes2[i] )
{
printf("%d-th element is not equal\n", i);
}
}
}
void test1()
{
char bytes1[4] = {-10, 0, 0, 0};
char bytes2[4];
short short_ints[2];
testCore(bytes1, bytes2, short_ints, 4);
}
void test2()
{
char bytes1[4] = {10, 0, 0, 0};
char bytes2[4];
short short_ints[2];
testCore(bytes1, bytes2, short_ints, 4);
}
int main()
{
printf("Calling test1 ...\n");
test1();
printf("Done\n");
printf("Calling test2 ...\n");
test2();
printf("Done\n");
return 0;
}
Output of the program:
Calling test1 ...
1-th element is not equal
Done
Calling test2 ...
Done
Udate
Here's a version of testCore that works for me:
void testCore(char bytes1[],
char bytes2[],
short short_ints[],
int size)
{
int i = 0;
int j = 0;
unsigned char c1;
unsigned char c2;
unsigned short s;
for ( i=0; i<size; i+=2)
{
c1 = bytes1[i];
c2 = bytes1[i+1];
short_ints[j] = (c2 << 8) | c1;
j++;
}
j = 0;
for ( i=0; i<size; i+=2)
{
s = short_ints[j];
s = s >> 8;
bytes2[i+1] = s;
bytes2[i] = short_ints[j] & 0xff;
j++;
}
for ( i=0; i<size; ++i)
{
if ( bytes1[i] != bytes2[i] )
{
printf("%d-th element is not equal\n", i);
}
}
}
It is tested with:
char bytes1[4] = {-10, 0, 25, -4};
and
char bytes1[4] = {10, -2, 25, 4};

Well, what you need is a UNION:
#include <stdio.h>
#include <string.h>
union MyShort {
short short_value;
struct {
char byte1;
char byte2;
};
};
int main(int argc, const char * argv[])
{
char a[4]="abcd";
char b[4]="1234";
short c[5]; c[4]=0;
union MyShort d;
for (int i = 0; i<4; i++) {
d.byte1 = a[i];
d.byte2 = b[i];
c[i] = d.short_value;
}//next i
printf("%s\n", (char*)c);
return 0;
}
the result should be a1b2c3d4.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

How to improve the performance of this code? - c

Related

How to vectorize and optimize this functions in C?

Intel Intrinsics code optimization

C: qsort doesn't seem to work with unsigned long

how to print memory bits in c

c; converting 2 bytes to short and vice versa

Categories

Resources