speeding up md5 program - c

This is a example of md5 in C, but the program is very slow it takes a little over a second to encode a simple string, What is slowing the program down?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
// Constants are the integer part of the sines of integers (in radians) * 2^32.
const uint32_t k[64] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee ,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 ,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be ,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 ,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa ,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 ,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed ,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a ,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c ,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 ,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 ,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 ,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 ,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 ,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 ,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
// r specifies the per-round shift amounts
const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
void to_bytes(uint32_t val, uint8_t *bytes)
{
bytes[0] = (uint8_t) val;
bytes[1] = (uint8_t) (val >> 8);
bytes[2] = (uint8_t) (val >> 16);
bytes[3] = (uint8_t) (val >> 24);
}
uint32_t to_int32(const uint8_t *bytes)
{
return (uint32_t) bytes[0]
| ((uint32_t) bytes[1] << 8)
| ((uint32_t) bytes[2] << 16)
| ((uint32_t) bytes[3] << 24);
}
void md5(const uint8_t *initial_msg, size_t initial_len, uint8_t *digest) {
// These vars will contain the hash
uint32_t h0, h1, h2, h3;
// Message (to prepare)
uint8_t *msg = NULL;
size_t new_len, offset;
uint32_t w[16];
uint32_t a, b, c, d, i, f, g, temp;
// Initialize variables - simple count in nibbles:
h0 = 0x67452301;
h1 = 0xefcdab89;
h2 = 0x98badcfe;
h3 = 0x10325476;
//Pre-processing:
//append "1" bit to message
//append "0" bits until message length in bits ≡ 448 (mod 512)
//append length mod (2^64) to message
for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++)
;
msg = (uint8_t*)malloc(new_len + 8);
memcpy(msg, initial_msg, initial_len);
msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first"
for (offset = initial_len + 1; offset < new_len; offset++)
msg[offset] = 0; // append "0" bits
// append the len in bits at the end of the buffer.
to_bytes(initial_len*8, msg + new_len);
// initial_len>>29 == initial_len*8>>32, but avoids overflow.
to_bytes(initial_len>>29, msg + new_len + 4);
// Process the message in successive 512-bit chunks:
//for each 512-bit chunk of message:
for(offset=0; offset<new_len; offset += (512/8)) {
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
for (i = 0; i < 16; i++)
w[i] = to_int32(msg + offset + i*4);
// Initialize hash value for this chunk:
a = h0;
b = h1;
c = h2;
d = h3;
// Main loop:
for(i = 0; i<64; i++) {
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5*i + 1) % 16;
} else if (i < 48) {
f = b ^ c ^ d;
g = (3*i + 5) % 16;
} else {
f = c ^ (b | (~d));
g = (7*i) % 16;
}
temp = d;
d = c;
c = b;
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
a = temp;
}
// Add this chunk's hash to result so far:
h0 += a;
h1 += b;
h2 += c;
h3 += d;
}
// cleanup
free(msg);
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
to_bytes(h0, digest);
to_bytes(h1, digest + 4);
to_bytes(h2, digest + 8);
to_bytes(h3, digest + 12);
}
int main(int argc, char **argv) {
char *msg = argv[1];
size_t len;
int i;
uint8_t result[16];
if (argc < 2) {
printf("usage: %s 'string'\n", argv[0]);
return 1;
}
len = strlen(msg);
// benchmark
for (i = 0; i < 1000000; i++) {
md5((uint8_t*)msg, len, result);
}
// display result
for (i = 0; i < 16; i++)
printf("%2.2x", result[i]);
puts("");
return 0;
}

I might be missing something, but:
// benchmark
for (i = 0; i < 1000000; i++) {
md5((uint8_t*)msg, len, result);
}
performs the same code one million times? A good idea to get an idea of how efficient it is, but not to actually do any work, efficiently.

Related

my MD5 implementation is giving the wrong result

Ive been trying to solve this issue for hours and hours on end im not completely sure whats making the wrong output obviously its hard to find out because its a hashing algorithm i know my chunking code is incomplete its still a work in progress and ive been programming this for a week
/*
* MD5 Implementation in C
* Created by Caelan Ireland 2023
*/
//#include "MD5.h"
#include <stdint.h> // uint32_t
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <endian.h>
#include <stdint.h> // needed?
// make sure to use -lm tag when compiling with gcc
#include <math.h>
// To Do: Essential Items for Understanding MD5
// 5. Divide up
// 6. Finish ABCD funct
// 4. Padding (Incomplete) incl error checks
// 4 32 bit words called A, B, C and D
#define A 0x67452301
#define B 0xEFCDAB89
#define C 0x98BADCFE
#define D 0x10325476
// remember Let [abcd k s i] ------------------------------------------->>>>>>>
const uint32_t S[64] = {
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
};
//const uint32_t MD[4] = {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476};
// unsigned int T[64];
// for(int i = 0; i < 64; i++) {
// T[i] = floor(pow(2, 32) * fabs(sin(i + 1))); // fabs fixed wrap or abs? floorf for floats
// printf("0x%08x\n", T[i]); // %u
// };
// precomputed hashes
const uint32_t K[64] = { // T
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a,
0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340,
0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa,
0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92,
0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
};
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define FF(a, b, c, d, k, s, i) \
{ (a) += F((b), (c), (d)) + (k) + (i); \
(a) = ROTATE_LEFT((a), (s)); \
(a) += (b); \
}
#define GG(a, b, c, d, k, s, i) \
{ (a) += G((b), (c), (d)) + (k) + (i); \
(a) = ROTATE_LEFT((a), (s)); \
(a) += (b); \
}
#define HH(a, b, c, d, k, s, i) \
{ (a) += H((b), (c), (d)) + (k) + (i); \
(a) = ROTATE_LEFT((a), (s)); \
(a) += (b); \
}
#define II(a, b, c, d, k, s, i) \
{ (a) += I((b), (c), (d)) + (k) + (i); \
(a) = ROTATE_LEFT((a), (s)); \
(a) += (b); \
}
// void ToLittleEndian(uint32_t* input) {
// uint32_t tmp = *input;
// *input = ((tmp & 0xff000000) >> 24) | ((tmp & 0x00ff0000) >> 8) |
// ((tmp & 0xff) << 24) | ((tmp & 0x0000ff00) << 8);
// }
uint8_t* ProcessChunk(uint8_t* input)
{
uint32_t AA = A;
uint32_t BB = B;
uint32_t CC = C;
uint32_t DD = D;
uint32_t M[16] = {0};
for(int i = 0; i < 16; i++) {
for(int j = 0; j < 4; j++) {
//word[i] |= (uint32_t)input[(i * 4) + j] << (24 - (j * 8));
M[i] |= (uint32_t)input[(i * 4) + j] << (32 - ((j + 1) * 8));
}
M[i] = htole32(M[i]); // is this necessary?
}
for(int i = 0; i < 16; i++) {
printf("\n word %d = 0x%08x\n", i, M[i]);
}
/* shifting rounds */
for(int i = 0; i < 64; i++) {
// error here
if (i < 16) {
FF(AA, BB, CC, DD, M[i], S[i], K[i]);
//printf("%d, %d, %u\n", M[i], S[i], K[i]);
} else if (i < 32) {
GG(AA, BB, CC, DD, M[(5 * i + 1) % 16], S[i], K[i]);
//printf("%d, %d, %u\n", M[(5 * i + 1) % 16], S[i], K[i]);
} else if (i < 48) {
HH(AA, BB, CC, DD, M[(3 * i + 5) % 16], S[i], K[i]);
//printf("%d, %d, %u\n", M[(3 * i + 5) % 16], S[i], K[i]);
} else {
II(AA, BB, CC, DD, M[(7 * i) % 16], S[i], K[i]);
//printf("%d, %d, %u\n", M[(7 * i) % 16], S[i], K[i]);
}
int Temp = DD;
DD = CC;
CC = BB;
BB = AA;
AA = Temp;
// printf("A = 0x%x\n", A);
// printf("B = 0x%x\n", B);
// printf("C = 0x%x\n", C);
// printf("D = 0x%x\n", D);
// printf("\n");
// printf("AA = 0x%x\n", AA);
// printf("BB = 0x%x\n", BB);
// printf("CC = 0x%x\n", CC);
// printf("DD = 0x%x\n", DD);
// printf("\n");
}
AA += A;
BB += B;
CC += C;
DD += D;
// uint64_t result = ((uint64_t)AA << 96) | ((uint64_t)BB << 64) |
// ((uint64_t)CC << 32) | (uint64_t)D;
uint8_t* digest = malloc(16);
for (int i = 0; i < 16; i++) {
if (i < 4) {
digest[i] = (AA >> (i*8)) & 0xff;
} else if (i < 8) {
digest[i] = (BB >> (i*8)) & 0xff;
} else if (i < 12) {
digest[i] = (CC >> (i*8)) & 0xff;
} else {
digest[i] = (DD >> (i*8)) & 0xff;
}
}
// for (int i = 0; i < 16; i++) {
// printf("%x ", result[i]);
// }
return (uint8_t*)digest;
// print out chunk
// printf("\n\n");
// for(int i = 0; i < 16; i++) {
// printf("0x%x ", M[i]);
// }
}
// abbrivation for print error
int printerr(char* errordesc)
{
printf("Error: %s\n", errordesc);
exit(1);
}
uint8_t* AddPadding(uint8_t* input, size_t input_len)
{
// compute padding length
size_t pad_len = (56 - (input_len) % 64) % 64;
// allocate required space to output buffer
uint8_t* padded = malloc(input_len + pad_len + 8);
if (padded == NULL) {
printerr("padding process cant allocate req memory");
}
// add cmd line string bytes into output
for(int i = 0; i < input_len; i++) { padded[i] = input[i]; }
// add padding bytes
for(int i = 0; i < (pad_len + 8); i++) {
if(i == 0) {
padded[input_len] = 0x80;
} else {
padded[input_len + 1 + i] = 0x00;
}
}
// changing size byte position to 0x80 when utilizing last byte
// add usigned 64 bit input length to end of the string
// if 2^64 only low order of bits are utilised
for(int i = 0; i < (64 / 8); i++) {
padded[input_len + pad_len + i] = (uint8_t)(input_len >> (64 - (i + 1) * 8) & 0xFF);
//padded[input_len + pad_len + i] = (uint8_t)(input_len >> (i * 8) & 0xFF);
}
return padded;
}
int main (int argc, char* argv[])
{
// initalise input var
uint8_t* inputstr = NULL;
if (argc !=3 || strcmp(argv[1], "--inputstr") != 0) {
printerr("usage - MD5 --inputstr (input_string)");
} else {
// point to argv[2] (3rd argument) for buffer
inputstr = (uint8_t*)argv[2];
}
// get str length
const size_t inputstr_len = strlen((char*)inputstr);
// improve later
if (inputstr_len == 0) {
printerr("inputstr < 1 char");
}
// add padding to last chunk
uint8_t* outputstr = AddPadding(inputstr, inputstr_len);
// print padding (probably will delete later)
for (int i = 0; i < inputstr_len + 1 + (64 - ((inputstr_len + 8 + 1) % 64 ) + 8); i++) {
printf("0x%02x ", outputstr[i]);
}
// break message into chunks + 1 compensation for padding
//size_t chunks = (inputstr_len / 64) + ((inputstr_len % 64 > 0) && (inputstr_len % 64 > 56));
uint8_t* chunk_result = NULL;
size_t chunks = (inputstr_len / 64) + 1 + (inputstr_len % 64 > 56);
for(int i = 0; i < chunks; i++) {
// only give req bytes
uint8_t* chunk_input = malloc(64 * sizeof(uint8_t));
for(int j = 0; j < 64; j++) {
chunk_input[j] = outputstr[i * 64 + j];
}
chunk_result = ProcessChunk(chunk_input);
// > 2 chunks??
}
// print chunk data
for (int i = 0; i < 16; i++) {
printf("%02x ", chunk_result[i]);
}
// make sure chunks cant be 0
printf("chunks: %ld\n", chunks);
// deallocate memory
free(outputstr);
free(chunk_result);
// executed sucessfully
exit(0);
}
so i was trying the MD5 hashing result for 'h' and it was
2510c39011c5be704182423e3a695e91
my result was
e0 64 f6 8c 9d de 68 df 1f 4b 11 d8 41 13 6b 77
i apologize i havent made a sorting method yet but by comparing the 0x25 you can tell its not equal
i have found a couple errors hopefully this helps on line 150 i defined them all as B in the macros #define statements i forgot the brackets around the letters

Taking screenshot using bmp file in c?

I'm trying to create a 3D Game using c.
when adding the flag '--save'
The game takes a screenshot and saves it.
so to do this (taking a screenshot from the game).
I have to use BMP file, so I have to make it using c.
The code:
#include "../includes/cub3d.h"
t_screenshot *ft_init_shot(t_mlx *mlx)
{
t_screenshot *takeshot;
takeshot = malloc(1 * sizeof(t_screenshot));
takeshot->width = w;
takeshot->height = h;
takeshot->bitcount = 24;
takeshot->width_in_bytes = ((takeshot->width * takeshot->bitcount + 31) / 32) * 4;
takeshot->imagesize = takeshot->width_in_bytes * takeshot->height;
takeshot->filesize = 54 + takeshot->imagesize;
return (takeshot);
}
unsigned char *ft_bitheader(t_mlx *mlx)
{
unsigned char *header;
uint32_t bisize;
uint32_t bfoffbits;
uint16_t biplanes;
header =(unsigned char *)malloc(54 * sizeof(char));
bisize = 40;
bfoffbits = 54;
biplanes = 1;
memcpy(header, "BM", 2);
memcpy(header + 2 , &mlx->shot->filesize, 4);
memcpy(header + 10, &bfoffbits, 4);
memcpy(header + 14, &bisize, 4);
memcpy(header + 18, &mlx->shot->width, 4);
memcpy(header + 22, &mlx->shot->height, 4);
memcpy(header + 26, &biplanes, 2);
memcpy(header + 28, &mlx->shot->bitcount, 2);
memcpy(header + 34, &mlx->shot->imagesize, 4);
return (header);
}
void screen_shot(t_mlx *mlx)
{
ft_move(mlx);
ft_update(mlx, YES);
mlx->shot = ft_init_shot(mlx);
mlx->shot->header = ft_bitheader(mlx);
screno(mlx);
}
void screno(t_mlx *mlx)
{
int x ;
int y ;
int row;
int col;
x = 0;
y = 0;
row = mlx->shot->height - 1;
col = 0;
unsigned char* buf = malloc(mlx->shot->imagesize);
while (row >= 0)
{
y = 0;
col = 0;
while (col < mlx->shot->width)
{
int red = (mlx->tex.img_data[x * w + y] >> 16) & 0xFF;
int green = (mlx->tex.img_data[x * w + y] >> 8) & 0xFF;
int blue = mlx->tex.img_data[x * w + y] & 0xFF;
buf[row * mlx->shot->width_in_bytes + col * 3 + 0] = blue;
buf[row * mlx->shot->width_in_bytes + col * 3 + 1] = green;
buf[row * mlx->shot->width_in_bytes + col * 3 + 2] = red;
col++;
y++;
}
row--;
x++;
}
ft_printf("Taking ScreenShoot....\n");
FILE *image = fopen("screenshot.bmp", "wb");
ft_printf("ScreenShot Has been saved under The name 'screenshot.bmp']\n");
fwrite(mlx->shot->header, 1, 54, image);
fwrite((char*)buf, 1, mlx->shot->imagesize, image);
fclose(image);
free(buf);
}
The problem is when I compiling This code the image appear some time and some time give me this error:
Thank You for Your help.

256-bit integer to string [duplicate]

I'm trying to convert a 128-bit unsigned integer stored as an array of 4 unsigned ints to the decimal string representation in C:
unsigned int src[] = { 0x12345678, 0x90abcdef, 0xfedcba90, 0x8765421 };
printf("%s", some_func(src)); // gives "53072739890371098123344"
(The input and output examples above are completely fictional; I have no idea what that input would produce.)
If I was going to hex, binary or octal, this would be a simple matter of masks and bit shifts to peel of the least significant characters. However, it seems to me that I need to do base-10 division. Unfortunately, I can't remember how to do that across multiple ints, and the system I'm using doesn't support data types larger than 32-bits, so using a 128-bit type is not possible. Using a different language is also out, and I'd rather avoid a big number library just for this one operation.
Division is not necessary:
#include <string.h>
#include <stdio.h>
typedef unsigned long uint32;
/* N[0] - contains least significant bits, N[3] - most significant */
char* Bin128ToDec(const uint32 N[4])
{
// log10(x) = log2(x) / log2(10) ~= log2(x) / 3.322
static char s[128 / 3 + 1 + 1];
uint32 n[4];
char* p = s;
int i;
memset(s, '0', sizeof(s) - 1);
s[sizeof(s) - 1] = '\0';
memcpy(n, N, sizeof(n));
for (i = 0; i < 128; i++)
{
int j, carry;
carry = (n[3] >= 0x80000000);
// Shift n[] left, doubling it
n[3] = ((n[3] << 1) & 0xFFFFFFFF) + (n[2] >= 0x80000000);
n[2] = ((n[2] << 1) & 0xFFFFFFFF) + (n[1] >= 0x80000000);
n[1] = ((n[1] << 1) & 0xFFFFFFFF) + (n[0] >= 0x80000000);
n[0] = ((n[0] << 1) & 0xFFFFFFFF);
// Add s[] to itself in decimal, doubling it
for (j = sizeof(s) - 2; j >= 0; j--)
{
s[j] += s[j] - '0' + carry;
carry = (s[j] > '9');
if (carry)
{
s[j] -= 10;
}
}
}
while ((p[0] == '0') && (p < &s[sizeof(s) - 2]))
{
p++;
}
return p;
}
int main(void)
{
static const uint32 testData[][4] =
{
{ 0, 0, 0, 0 },
{ 1048576, 0, 0, 0 },
{ 0xFFFFFFFF, 0, 0, 0 },
{ 0, 1, 0, 0 },
{ 0x12345678, 0x90abcdef, 0xfedcba90, 0x8765421 }
};
printf("%s\n", Bin128ToDec(testData[0]));
printf("%s\n", Bin128ToDec(testData[1]));
printf("%s\n", Bin128ToDec(testData[2]));
printf("%s\n", Bin128ToDec(testData[3]));
printf("%s\n", Bin128ToDec(testData[4]));
return 0;
}
Output:
0
1048576
4294967295
4294967296
11248221411398543556294285637029484152
Straightforward division base 2^32, prints decimal digits in reverse order, uses 64-bit arithmetic, complexity O(n) where n is the number of decimal digits in the representation:
#include <stdio.h>
unsigned int a [] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
/* 24197857161011715162171839636988778104 */
int
main ()
{
unsigned long long d, r;
do
{
r = a [0];
d = r / 10;
r = ((r - d * 10) << 32) + a [1];
a [0] = d;
d = r / 10;
r = ((r - d * 10) << 32) + a [2];
a [1] = d;
d = r / 10;
r = ((r - d * 10) << 32) + a [3];
a [2] = d;
d = r / 10;
r = r - d * 10;
a [3] = d;
printf ("%d\n", (unsigned int) r);
}
while (a[0] || a[1] || a[2] || a[3]);
return 0;
}
EDIT: Corrected the loop so it displays a 0 if the array a contains only zeros.
Also, the array is read left to right, a[0] is most-significant, a[3] is least significant digits.
A slow but simple approach is to just printing digits from most significant to least significant using subtraction. Basically you need a function for checking if x >= y and another for computing x -= y when that is the case.
Then you can start counting how many times you can subtract 10^38 (and this will be most significant digit), then how many times you can subtract 10^37 ... down to how many times you can subtract 1.
The following is a full implementation of this approach:
#include <stdio.h>
typedef unsigned ui128[4];
int ge128(ui128 a, ui128 b)
{
int i = 3;
while (i >= 0 && a[i] == b[i])
--i;
return i < 0 ? 1 : a[i] >= b[i];
}
void sub128(ui128 a, ui128 b)
{
int i = 0;
int borrow = 0;
while (i < 4)
{
int next_borrow = (borrow && a[i] <= b[i]) || (!borrow && a[i] < b[i]);
a[i] -= b[i] + borrow;
borrow = next_borrow;
i += 1;
}
}
ui128 deci128[] = {{1u,0u,0u,0u},
{10u,0u,0u,0u},
{100u,0u,0u,0u},
{1000u,0u,0u,0u},
{10000u,0u,0u,0u},
{100000u,0u,0u,0u},
{1000000u,0u,0u,0u},
{10000000u,0u,0u,0u},
{100000000u,0u,0u,0u},
{1000000000u,0u,0u,0u},
{1410065408u,2u,0u,0u},
{1215752192u,23u,0u,0u},
{3567587328u,232u,0u,0u},
{1316134912u,2328u,0u,0u},
{276447232u,23283u,0u,0u},
{2764472320u,232830u,0u,0u},
{1874919424u,2328306u,0u,0u},
{1569325056u,23283064u,0u,0u},
{2808348672u,232830643u,0u,0u},
{2313682944u,2328306436u,0u,0u},
{1661992960u,1808227885u,5u,0u},
{3735027712u,902409669u,54u,0u},
{2990538752u,434162106u,542u,0u},
{4135583744u,46653770u,5421u,0u},
{2701131776u,466537709u,54210u,0u},
{1241513984u,370409800u,542101u,0u},
{3825205248u,3704098002u,5421010u,0u},
{3892314112u,2681241660u,54210108u,0u},
{268435456u,1042612833u,542101086u,0u},
{2684354560u,1836193738u,1126043566u,1u},
{1073741824u,1182068202u,2670501072u,12u},
{2147483648u,3230747430u,935206946u,126u},
{0u,2242703233u,762134875u,1262u},
{0u,952195850u,3326381459u,12621u},
{0u,932023908u,3199043520u,126217u},
{0u,730304488u,1925664130u,1262177u},
{0u,3008077584u,2076772117u,12621774u},
{0u,16004768u,3587851993u,126217744u},
{0u,160047680u,1518781562u,1262177448u}};
void print128(ui128 x)
{
int i = 38;
int z = 0;
while (i >= 0)
{
int c = 0;
while (ge128(x, deci128[i]))
{
c++; sub128(x, deci128[i]);
}
if (i==0 || z || c > 0)
{
z = 1; putchar('0' + c);
}
--i;
}
}
int main(int argc, const char *argv[])
{
ui128 test = { 0x12345678, 0x90abcdef, 0xfedcba90, 0x8765421 };
print128(test);
return 0;
}
That number in the problem text in decimal becomes
11248221411398543556294285637029484152
and Python agrees this is the correct value (this of course doesn't mean the code is correct!!! ;-) )
Same thing, but with 32-bit integer arithmetic:
#include <stdio.h>
unsigned short a [] = {
0x0876, 0x5421,
0xfedc, 0xba90,
0x90ab, 0xcdef,
0x1234, 0x5678
};
int
main ()
{
unsigned int d, r;
do
{
r = a [0];
d = r / 10;
r = ((r - d * 10) << 16) + a [1];
a [0] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [2];
a [1] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [3];
a [2] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [4];
a [3] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [5];
a [4] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [6];
a [5] = d;
d = r / 10;
r = ((r - d * 10) << 16) + a [7];
a [6] = d;
d = r / 10;
r = r - d * 10;
a [7] = d;
printf ("%d\n", r);
}
while (a[0] || a[1] || a[2] || a[3] || a [4] || a [5] || a[6] || a[7]);
return 0;
}
You actually don't need to implement long division. You need to implement multiplication by a power of two, and addition. You have four uint_32. First convert each of them to a string. Multiply them by (2^32)^3, (2^32)^2, (2^32)^1, and (2^32)^0 respectively, then add them together. You don't need to do the base conversion, you just need to handle putting the four pieces together. You'll obviously need to make sure the strings can handle a number up to UINT_32_MAX*(2^32)^3.
Supposing you have a fast 32-bit multiplication and division the result can be computed 4 digits at a time by implementing a bigint division/modulo 10000 and then using (s)printf for output of digit groups.
This approach is also trivial to extend to higher (or even variable) precision...
#include <stdio.h>
typedef unsigned long bigint[4];
void print_bigint(bigint src)
{
unsigned long int x[8]; // expanded version (16 bit per element)
int result[12]; // 4 digits per element
int done = 0; // did we finish?
int i = 0; // digit group counter
/* expand to 16-bit per element */
x[0] = src[0] & 65535;
x[1] = src[0] >> 16;
x[2] = src[1] & 65535;
x[3] = src[1] >> 16;
x[4] = src[2] & 65535;
x[5] = src[2] >> 16;
x[6] = src[3] & 65535;
x[7] = src[3] >> 16;
while (!done)
{
done = 1;
{
unsigned long carry = 0;
int j;
for (j=7; j>=0; j--)
{
unsigned long d = (carry << 16) + x[j];
x[j] = d / 10000;
carry = d - x[j] * 10000;
if (x[j]) done = 0;
}
result[i++] = carry;
}
}
printf ("%i", result[--i]);
while (i > 0)
{
printf("%04i", result[--i]);
}
}
int main(int argc, const char *argv[])
{
bigint tests[] = { { 0, 0, 0, 0 },
{ 0xFFFFFFFFUL, 0, 0, 0 },
{ 0, 1, 0, 0 },
{ 0x12345678UL, 0x90abcdefUL, 0xfedcba90UL, 0x8765421UL } };
{
int i;
for (i=0; i<4; i++)
{
print_bigint(tests[i]);
printf("\n");
}
}
return 0;
}
#Alexey Frunze's method is easy but it's very slow. You should use #chill's 32-bit integer method above. Another easy method without any multiplication or division is double dabble. This may work slower than chill's algorithm but much faster than Alexey's one. After running you'll have a packed BCD of the decimal number
On github is an open source project (c++) which provides a class for a datatype uint265_t and uint128_t.
https://github.com/calccrypto/uint256_t
No, I' not affiliated with that project, but I was using it for such a purpose, but I guess it could be usefull for others as well.

How can I encode four 16 bit uints into a 64 bit uint, and decode them again?

I wrote this function with the help of this page on bit twiddling:
uint16_t *decode(uint64_t instr) {
// decode instr (this is new to me lol)
uint16_t icode = (instr >> 48) & ((1 << 16) - 1);
uint16_t p1 = (instr >> 32) & ((1 << 16) - 1);
uint16_t p2 = (instr >> 16) & ((1 << 16) - 1);
uint16_t p3 = (instr >> 00) & ((1 << 16) - 1);
return (uint16_t[]){icode, p1, p2, p3};
}
I have this to test it:
uint16_t *arr = decode(number);
for(int i = 0; i < 4; i++) {
printf("%d\n", arr[i]);
}
However, this prints 0 four times whatever number is. I also haven't solved the first part of the question, how to encode the four uint16_t's in the first place.
how to encode the four uint16_t's in the first place
This isn't hard. All you have to do is to load each uint16_t to a uint64_t one-by-one, and then return that uint64_t:
uint64_t encode(uint16_t uints[]) {
uint64_t master = 0;
for (uint8_t index = 0; index <= 3; ++index) {
master <<= 16; // Shift master left by 16 bits to create space for the next uint16
master |= uints[index]; // Load uints[index] to the lower 16 bits of master
} // Do this four times
return master;
}
To load the uint16_ts in reverse order, simply replace uint8_t index = 0; index <= 3; ++index with uint8_t index = 3; index >= 0; --index.
Your best bet is actually to use memcpy. Most modern compilers will optimize this into the necessary bit shifts and such for you.
uint64_t pack(const uint16_t arr[static 4]) {
uint64_t res;
memcpy(&res, arr, 8);
return res;
}
void unpack(uint64_t v, uint16_t arr[static 4]) {
memcpy(arr, &v, 8);
}
Note that the result is endian-dependent, appropriate for packing and unpacking on the same machine. Note too that I'm using the static array specifier to check that the caller passes at least 4 elements (when such checking is possible); if that gives your compiler grief, just remove the static specifier.
First, you can't pass an array back from a function the way you currently have it (which is why you're getting 0's), you'll need to pass it via pointer or static reference.
However, since you're dealing with 2 known bit-widths, you can use a mask and shift off that:
out[0] = val & 0x000000000000FFFF; // 1st word
out[1] = (val & 0x00000000FFFF0000) >> 16; // 2nd word
out[2] = (val & 0x0000FFFF00000000) >> 32; // 3rd word
out[3] = (val & 0xFFFF000000000000) >> 48; // 4th word
You could put this in a function or macro:
#define MACRO_DECODE(val, arr) arr[0]= val & 0x000000000000FFFF; \
arr[1] = (val & 0x00000000FFFF0000) >> 16; \
arr[2] = (val & 0x0000FFFF00000000) >> 32; \
arr[3] = (val & 0xFFFF000000000000) >> 48;
void decode(uint64_t val, uint16_t *out)
{
out[0] = val & 0x000000000000FFFF;
out[1] = (val & 0x00000000FFFF0000) >> 16;
out[2] = (val & 0x0000FFFF00000000) >> 32;
out[3] = (val & 0xFFFF000000000000) >> 48;
}
int main(int argc, char** argv)
{
int i;
uint16_t arr[] = { 0, 0, 0, 0} ;
for (i = 0; i < 4; ++i) {
printf("%#06x = %d\n", arr[i], arr[i]);
}
// as a function
decode(0xAAAABBBBCCCCDDDD, arr);
for (i = 0; i < 4; ++i) {
printf("%#06x = %d\n", arr[i], arr[i]);
}
// as a macro
MACRO_DECODE(0xDDDDCCCCBBBBAAAA, arr);
for (i = 0; i < 4; ++i) {
printf("%#06x = %d\n", arr[i], arr[i]);
}
return 0;
}
Additionally, you could use memcpy:
int main(int argc, char** argv)
{
int i;
uint16_t arr[] = { 0, 0, 0, 0} ;
uint64_t src = 0xAAAABBBBCCCCDDDD;
for (i = 0; i < 4; ++i) {
printf("%#06x = %d\n", arr[i], arr[i]);
}
// memcpy
memcpy(arr, &src, sizeof(arr));
for (i = 0; i < 4; ++i) {
printf("%#06x = %d\n", arr[i], arr[i]);
}
return 0;
}
Hope that can help.

Finding t[] array in Sakamoto's algorithm to find the day of week

I wanna know how to find how to calculate the values of t[] array in this algorithm?
int dow(int y, int m, int d)
{
static int t[] = {0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4};
y -= m < 3;
return (y + y/4 - y/100 + y/400 + t[m-1] + d) % 7;
}
You can use code below to generate the required array
#include <stdio.h>
int main(void) {
int arr[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
int totalDaysInThisYear = -1, m; /* Parity would make totalDaysInThisYear = 0 */
for(m = 1; m <= 12; ++m) {
const int parity = (m < 3);
printf("%d\n", (totalDaysInThisYear + parity) % 7);
totalDaysInThisYear+= arr[m - 1];
}
return 0;
}

Resources