How to use MPI_Scatterv and displacement in C - c

#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
int size;
typedef struct process_struct
{
int start;
int end;
} process_tag;
int main(int argc, char **argv)
{ //array size
size=11;
int rc, myrank, world_size;
rc = MPI_Init(&argc, &argv);
if (rc != MPI_SUCCESS) {
printf ("Error starting MPI program\n");
MPI_Abort(MPI_COMM_WORLD, rc);
}
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int i, j;
int process_num=4;
int processs_i;
int start, end;
double **a = malloc(size*sizeof(double*));
if (a == NULL) { printf("malloc failed\n"); return 1; }
for (i = 0; i < size; i++) {
a[i] = malloc(size*sizeof(double));
if (a[i] == NULL) { printf("malloc failed\n"); return 1; }
}
//initialize array "a"
a=ini(a);
if(myrank == 0 ) {
printarray(a,size);
}
// store starting row and ending row for each process
process_tag process[process_num];
int chunk_size = (size - 2 ) / process_num ;
int remain = (size - 2) - chunk_size * process_num ;
start = 1;
end = 1;
int process_i =0;
while(start < size) {
start = end;
end = start + chunk_size;
if (remain > 0) {
end++;
remain--;
}
process[process_i].start = start;
process[process_i].end = end;
process_i++;
}
int send_count[process_num];
for (process_i = 0; process_i < process_num; process_i++) {
send_count[process_i] = process[process_i].end - process[process_i].start;
}
int receive_count = send_count[myrank];
int *displs = malloc(sizeof(int)*size);
int sum =1;
for (i = 0; i < process_num; i++) {
displs[i] = sum;
sum += send_count[i];
}
double **blocal;
blocal = malloc(receive_count*sizeof(double*));
if (blocal == NULL)
{ printf("malloc failed\n"); return 1; }
for (i = 0; i < size; i++) {
blocal[i] = malloc(size*sizeof(double));
if (blocal[i] == NULL)
{ printf("malloc failed\n"); return 1; }
}
MPI_Scatterv(a, send_count, displs, MPI_DOUBLE, blocal,
receive_count, MPI_DOUBLE, 0, MPI_COMM_WORLD);
printarray(blocal,receive_count);
printf("from rank %d \n", myrank);
}
output is:
array a is
1.000000 3.700000 2.000000 6.000000 5.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 8.200000 3.000000 7.000000 3.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 6.000000 9.000000 1.000000 6.300000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 5.000000 1.000000 3.000000 4.300000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 4.500000 6.000000 4.000000 7.600000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000
blocal from rank 0
1.000000 8.200000 3.000000 7.000000 3.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 6.000000 9.000000 1.000000 6.300000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1.000000 5.000000 1.000000 3.000000 4.300000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
blocal from rank 1
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
blocal from rank 2
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
blocal from rank 3
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
I want to scatter the elements of 11*11 sendbuf array "a" to arrays "blocal" (receivebuf).
What I want to achieve is that blocal at process 0 get row 1 to 3, process 1 get row 4 to 5, process 2 get row 6 to 7, and process 3 get row 8 to 9. Row 0 and row 10 are ignored.
However in my output only process 0 got the sub-array from array "a".
Could anybody spot any error in my code? I think the problem might be the displacement.

Related

how to pass a pointer to a function and create a matrix there with the pointer as the starting address?

Same matrix should be printed but here outside the function its not printing any value of the matrix. What is the issue here?(I dont want the argument name in function and name of variable passed to be same.)
0.000000 0.000000 0.000000 0.000000 0.000000
1.000000 1.000000 1.000000 1.000000 1.000000
2.000000 2.000000 2.000000 2.000000 2.000000
3.000000 3.000000 3.000000 3.000000 3.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
#include<stdio.h>
#include<stdlib.h>
void tryn(double *a)
{
int i,j;
a=(double *)calloc(20,sizeof(double));
for(i=0;i<4;i++)
{
for(j=0;j<5;j++)
{
*(a+i*5+j)=i;
}
}
for(i=0;i<4;i++)
{
for(j=0;j<5;j++)
{
printf("%lf ",*(a+i*5+j));
}
printf("\n");
}
}
int main()
{
int i,j;
double *arr;
tryn(arr);
for(i=0;i<4;i++)
{
for(j=0;j<5;j++)
{
printf("%lf ",(arr+i*5+j));
}
printf("\n");
}
free(arr);
}
the output its giving
Parameters to functions in C are pass by value. That means that changes to a in tryn are not reflected in the calling function, so arr in main remains uninitialized.
You need to pass the address of arr to your function:
tryn(&arr);
And change the parameter type in the function accordingly:
void tryn(double **arr)
{
double *a=calloc(20,sizeof(double));
...
*arr = a;
}

Lagrange Interpolation Algorithm in C Diverges After Many Steps

I have a Lagrange interpolation algorithm that begins to diverge after many time steps and I can't seem to figure out why. As a quick review, if I had two arrays
int x[11] = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}
int y[11] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20}
and I input an x-value of 15 into the algorithm, the output (i.e. interpolated y-value) should be 3. The following algorithm gets the interpolated value correct, but as I cycle through incremented inputs eventually the outputs begin to diverge. I am not sure what is causing the divergence. The code creates two arrays of integers going from -100 to +100 and interpolates the values based on an incremented x-input. The values begin matching as they should, but around 55 or so the interpolated y-value begins to diverge. The code is below. Any insight would be greatly appreciated.
#include <stdio.h>
#define SIZE 201
int main()
{
double x[SIZE], y[SIZE], value, sum, factor[SIZE];
for (int i = 0; i < SIZE; i++)
{
x[i] = -100 + i;
}
for (int i = 0; i < SIZE; i++)
{
y[i] = -100 + i;
}
value = 0.0;
while (1)
{
sum = 0.0;
printf("Input is: %lf\n", value);
for(int i = 0; i < SIZE; i++)
{
factor[i] = 1.0;
for(int j = 0; j < SIZE; j++)
{
if(i != j)
{
factor[i] = factor[i] * (value - x[j])/(x[i] - x[j]);
}
}
sum = sum + factor[i] * y[i];
}
printf("Output is: %lf\n", sum);
// if ((value - sum) > 0.01) break;
if (value < 100) value += 0.001;
else break;
}
return 0;
}
Given N samples, the Lagrange polynomial is of the degree of N, in your case, 200. It is a pretty large degree, and for a value large enough the intermediate results (i.e. factor) starts behaving quite erratically. I printed factor after each iteration of the outer loop, and this is what I have on my machine (where the code diverges at value 62.1):
Input is: 62.100000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000002
-0.000010
0.000047
-0.000212
0.000916
-0.003834
0.015558
-0.061214
0.233667
-0.865800
3.115490
-10.892598
37.019453
-122.351630
393.413839
-1231.176112
3751.320027
-11132.603776
32188.920849
-90709.929863
249216.884601
-667734.556134
1745251.075381
-4451006.398268
11079451.201015
-26924437.939740
63892139.532124
-148088119.614110
335320733.480250
-741923910.135582
1604370277.576735
-3391407192.476863
7009154161.161494
-14165714298.372702
28000907070.092331
-54142322716.578796
102423636122.796417
-189594810732.400879
343460854643.929565
-608991796878.604858
1057024952593.679688
-1796190002106.606201
2988571833231.810547
-4869319260810.958008
7769844431032.450195
-12143392562153.164062
18590594785582.515625
-27881222667878.292969
40966915113033.500000
-58978430272092.585938
83200365623915.609375
-115016713573880.578125
155822462931701.031250
-206899948714767.906250
269263745978734.968750
-343484172924072.000000
429506076055356.812500
-526485323131795.875000
632668952077638.500000
-745344926690223.250000
860883054405210.375000
-974879663742953.625000
1082405867199472.750000
-1178344322529343.250000
1257784740974879.500000
-1316436663535827.500000
1351011674779421.000000
-1359527880018181.000000
1341497535715305.750000
-1297973187760748.750000
1231446261994579.000000
-1145611653890577.250000
1045029162299372.250000
-934724762872858.125000
819779917571950.250000
-704954924193421.250000
594383648437451.875000
-491363855983359.125000
398252366806871.062500
-316460003024013.937500
246529925760965.156250
-188275766805651.375000
140953330916004.437500
-103441104978326.546875
74409294797142.390625
-52463275136077.218750
36253867741005.359375
-24552698830046.890625
16295359517452.095703
-10597930155882.712891
6753701701870.307617
-4216931244837.877441
2579609371842.192871
-1545902253614.920410
907502140862.699341
-521815325006.697205
293869684950.453308
-162078786010.080200
87537719445.039368
-46294138480.248749
23970808358.990040
-12151499571.568396
6030219319.167710
-2929269302.567177
1392763710.430415
-648125926.103312
295175902.544337
-131559694.388017
57381635.324659
-24492187.039684
10230449.673919
-4182126.956840
1673313.368037
-655394.261464
251347.530884
-94414.677824
34753.964878
-12544.686326
4444.407825
-1547.546444
530.612164
-179.651152
60.317931
-20.218974
6.844738
-2.391284
0.904560
-0.429040
1.136162
0.029430
-0.003145
0.000450
-0.000070
0.000011
-0.000002
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
0.000000
-0.000000
Output is: 67.164298
Sorry for the volume of paste.
You can see that the code operates with very large factors (e.g. 155822462931701.031250) while sum stays around 1. This leads to loss of precision due to normalization. And the loss of precision amplifies as value grows.
Bottom line is, the naive Lagrange interpolation is numerically unstable. Check out Notes.

FIFO in While loop stucks and waits for read

Server sends its PID to client, client gets and sends SIGUSR1 signal in a loop, so i have handler for that in server code. In theory it needs to get signal and create some random matrix to client back.Client receives Server PID but it never sends matrix to client for some reason. It waits and insert blinking forever.I assume somehow they dont connect to each other but why? I tried to sending a matrix and it did just fine.
client.c
int main()
{
int n=2;
pid_t pid=1;
int i,j;
double matrix[20][20];
char *myfifo2 = "/tmp/kek_fifo";
int server_to_client;
char *myfifo3 = "/tmp/fifor_fifo";
int fifor;
int forke=1;
char str[BUFSIZ];
fifor = open(myfifo3, O_RDONLY);
read(fifor,&pid,sizeof(pid));
printf("pid %d\n",pid);
close(fifor);
unlink(myfifo3);
while(1){
printf("no\n");
server_to_client = open(myfifo2, O_RDONLY);
read(server_to_client,matrix,sizeof(matrix));
close(server_to_client);
printf("yes\n");
forke=fork();
if(forke){
printf("forke buyuktur >= parent olmali %d \n",forke);
for(i = 0; i < 2*n; i++){
for(j = 0; j < 2*n; j++){
printf("%2f ",matrix[i][j]);
}
printf("\n");
}
wait(0); //parent child bekliyo
}
else{
printf("forke =0 yani child %d \n",forke);
exit(0);
}
//sleep(5);
kill(pid,SIGUSR1);
}
return 0;
}
server.c
void actionHandler(int signum);
void matrix_init(double m[20][20],int n);
void randome(double myPointer[20][20],int n);
static int signalcheck=0;
int main()
{
int i,j;
int n=2; //argv olacak
pid_t pide=getpid();
printf("pide is %d \n",pide);
struct sigaction action;
action.sa_handler=actionHandler;
action.sa_flags = 0;
int client_to_server;
char *myfifo = "/tmp/client_to_server_fifo";
int forke;
int server_to_client;
char *myfifo2 = "/tmp/kek_fifo";
char *myfifo3 = "/tmp/fifor_fifo";
int fifor;
char buf[BUFSIZ];
double M[20][20];
matrix_init(M,n);
/* create the FIFO (named pipe) */
// mkfifo(myfifo, 0666);
mkfifo(myfifo2, 0666);
mkfifo(myfifo3,0666);
fifor =open(myfifo3,O_WRONLY);
write(fifor,&pide,sizeof(pide));
close(fifor);
unlink(myfifo3);
if ((sigemptyset(&action.sa_mask) == -1) ||
(sigaction(SIGUSR1, &action, NULL) == -1))
perror("Failed to install SIGURS1 signal handler");
/* open, read, and display the message from the FIFO */
// client_to_server = open(myfifo, O_RDONLY);
printf("Server ON.\n");
while (1)
{
if(signalcheck==1){
forke=fork();
if(forke==0){
while(1){
randome(M,n);
server_to_client = open(myfifo2, O_WRONLY);
write(server_to_client,M,sizeof(M));
close(server_to_client);
unlink(myfifo2);
printf("matrix yarat\n");
//sleep(5);
//create matrix send to fifo
}
}
/*
read(client_to_server, buf, BUFSIZ);
if (strcmp("exit",buf)==0)
{
printf("Server OFF.\n");
break;
}
else if (strcmp("",buf)!=0)
{
printf("Received: %s\n", buf);
printf("Sending back...\n");
write(server_to_client,buf,BUFSIZ);
}
clean buf from any data
memset(buf, 0, sizeof(buf)); */
//sleep(5);
}
signalcheck=0;
}
// close(client_to_server);
// close(server_to_client);
// unlink(myfifo);
// unlink(myfifo2);
return 0;
}
void actionHandler(int signum)
{
signalcheck=1;
if(signum==SIGUSR1)
printf("i catched signal code from client i guess \n");
/* Signal handler code goes here. */
}
void matrix_init(double m[20][20],int z){
/*
this function purpose to create
2Nx2N int matrix n being int z
then initing this matrix to 0 before random numbers take in part
it has error checks .
blah!*/
int i, j,n=z;
double **k = (double **) malloc(n * sizeof(double*));
if(m == NULL){
printf("Error full matrix not created");
exit(-1);
}
for(i = 0; i < n; i++){
for(j = 0; j < n; j++){
m[i][j] = 0;
}
}
}
void randome(double myPointer[20][20],int n){
int i,j;
int r;
srand ( time(NULL) );
for(i=0;i<n*2;i++)
for(j=0;j<n*2;j++)
myPointer[i][j]=rand()%10;
}
I think one reason your question got ignored for so long is that the code is that there's quite a lot of code and a fair amount of comments that are irrelevant bits of code, and the layout isn't all tidy. These add up to "it is hard work to read the code", which puts people off actually reading it.
This code seems to work OK on a MacBook Pro running macOS Sierra 10.12.4 using GCC 6.3.0 to compile.
Client code
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
int main(void)
{
int n = 2;
pid_t pid = 1;
int i, j;
double matrix[20][20];
char *myfifo2 = "/tmp/kek_fifo";
int server_to_client;
char *myfifo3 = "/tmp/fifor_fifo";
int fifor;
int forke = 1;
fifor = open(myfifo3, O_RDONLY);
read(fifor, &pid, sizeof(pid));
printf("pid %d\n", pid);
close(fifor);
unlink(myfifo3);
for (int loopnum = 0; loopnum < 20; loopnum++)
{
printf("begin loop %d\n", loopnum);
kill(pid, SIGUSR1);
printf("signal sent\n");
server_to_client = open(myfifo2, O_RDONLY);
printf("Open done\n");
read(server_to_client, matrix, sizeof(matrix));
printf("read done\n");
close(server_to_client);
printf("close done\n");
forke = fork();
if (forke)
{
printf("forke buyuktur >= parent olmali %d\n", forke);
for (i = 0; i < 2 * n; i++)
{
for (j = 0; j < 2 * n; j++)
{
printf("%2f ", matrix[i][j]);
}
printf("\n");
}
wait(0); // parent child bekliyo
}
else
{
printf("forke = 0 yani child %d\n", forke);
exit(loopnum);
}
}
return 0;
}
Server code
#include <assert.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
void actionHandler(int signum);
void matrix_init(double m[20][20], int n);
void randome(double myPointer[20][20], int n);
static volatile sig_atomic_t signalcheck = 0;
int main(void)
{
int n = 2; // argv olacak
pid_t pide = getpid();
printf("pide is %d\n", pide);
struct sigaction action;
action.sa_handler = actionHandler;
action.sa_flags = 0;
int forke;
int server_to_client;
char *myfifo2 = "/tmp/kek_fifo";
char *myfifo3 = "/tmp/fifor_fifo";
int fifor;
double M[20][20];
matrix_init(M, n);
srand(time(NULL));
if ((sigemptyset(&action.sa_mask) == -1) ||
(sigaction(SIGUSR1, &action, NULL) == -1))
{
perror("Failed to install SIGURS1 signal handler");
return 1;
}
/* create the FIFO (named pipe) */
mkfifo(myfifo2, 0666);
mkfifo(myfifo3, 0666);
fifor = open(myfifo3, O_WRONLY);
write(fifor, &pide, sizeof(pide));
close(fifor);
unlink(myfifo3);
printf("Server ON (%d).\n", signalcheck);
while (1)
{
if (signalcheck == 1)
{
printf("Signal received:\n");
signalcheck = 0;
randome(M, n);
forke = fork();
if (forke == 0)
{
server_to_client = open(myfifo2, O_WRONLY);
write(server_to_client, M, sizeof(M));
close(server_to_client);
printf("matrix yarat %d\n", (int)getpid());
exit(0);
}
else
{
int corpse;
int status;
while ((corpse = wait(&status)) != -1)
printf("PID %d exited with status 0x%.4x\n", corpse, status);
}
}
}
return 0;
}
void actionHandler(int signum)
{
static char msg[] = "I caught a signal from the client\n";
signalcheck = 1;
if (signum == SIGUSR1)
{
int nb = write(1, msg, sizeof(msg)-1);
assert(nb == sizeof(msg)-1);
}
}
void matrix_init(double m[20][20], int z)
{
for (int i = 0; i < z; i++)
{
for (int j = 0; j < z; j++)
m[i][j] = 0;
}
}
void randome(double myPointer[20][20], int n)
{
int i, j;
for (i = 0; i < n * 2; i++)
{
for (j = 0; j < n * 2; j++)
myPointer[i][j] = rand() % 10;
}
}
Server output
pide is 474
Server ON (0).
I caught a signal from the client
Signal received:
matrix yarat 476
PID 476 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 478
PID 478 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 481
PID 481 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 483
PID 483 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 485
PID 485 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 488
PID 488 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 491
PID 491 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 493
PID 493 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 496
PID 496 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 499
PID 499 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 501
PID 501 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 503
PID 503 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 506
PID 506 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 512
PID 512 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 514
PID 514 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 516
PID 516 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 518
PID 518 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 521
PID 521 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 523
PID 523 exited with status 0x0000
I caught a signal from the client
Signal received:
matrix yarat 525
PID 525 exited with status 0x0000
^C
The ^C is where I interrupted the server in its terminal window. The program was spinning in a busy loop, using 99.5% of a CPU.
Client output
pid 474
begin loop 0
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 477
5.000000 6.000000 0.000000 9.000000
2.000000 1.000000 4.000000 7.000000
0.000000 8.000000 2.000000 5.000000
7.000000 8.000000 3.000000 2.000000
forke = 0 yani child 0
begin loop 1
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 480
0.000000 7.000000 0.000000 3.000000
0.000000 7.000000 2.000000 9.000000
2.000000 4.000000 5.000000 5.000000
9.000000 7.000000 3.000000 4.000000
forke = 0 yani child 0
begin loop 2
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 482
8.000000 6.000000 1.000000 4.000000
9.000000 2.000000 3.000000 9.000000
2.000000 3.000000 8.000000 0.000000
7.000000 8.000000 9.000000 8.000000
forke = 0 yani child 0
begin loop 3
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 484
1.000000 4.000000 2.000000 9.000000
5.000000 3.000000 5.000000 0.000000
5.000000 9.000000 7.000000 5.000000
3.000000 1.000000 7.000000 3.000000
forke = 0 yani child 0
begin loop 4
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 487
2.000000 3.000000 0.000000 4.000000
0.000000 2.000000 6.000000 8.000000
1.000000 8.000000 8.000000 7.000000
7.000000 4.000000 1.000000 4.000000
forke = 0 yani child 0
begin loop 5
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 489
6.000000 3.000000 4.000000 0.000000
5.000000 9.000000 2.000000 3.000000
9.000000 7.000000 5.000000 4.000000
5.000000 9.000000 1.000000 8.000000
forke = 0 yani child 0
begin loop 6
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 492
7.000000 1.000000 4.000000 6.000000
7.000000 1.000000 2.000000 4.000000
5.000000 1.000000 8.000000 6.000000
1.000000 9.000000 8.000000 6.000000
forke = 0 yani child 0
begin loop 7
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 495
0.000000 0.000000 5.000000 1.000000
6.000000 8.000000 3.000000 1.000000
8.000000 1.000000 6.000000 9.000000
7.000000 3.000000 1.000000 5.000000
forke = 0 yani child 0
begin loop 8
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 497
3.000000 0.000000 1.000000 8.000000
4.000000 4.000000 2.000000 3.000000
8.000000 9.000000 6.000000 3.000000
1.000000 3.000000 6.000000 3.000000
forke = 0 yani child 0
begin loop 9
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 500
4.000000 0.000000 9.000000 3.000000
1.000000 5.000000 1.000000 2.000000
3.000000 0.000000 5.000000 6.000000
4.000000 5.000000 9.000000 7.000000
forke = 0 yani child 0
begin loop 10
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 502
4.000000 9.000000 7.000000 6.000000
6.000000 4.000000 7.000000 1.000000
3.000000 5.000000 9.000000 0.000000
7.000000 9.000000 7.000000 9.000000
forke = 0 yani child 0
begin loop 11
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 504
3.000000 3.000000 0.000000 3.000000
7.000000 5.000000 8.000000 5.000000
6.000000 1.000000 3.000000 6.000000
8.000000 3.000000 3.000000 0.000000
forke = 0 yani child 0
begin loop 12
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 507
6.000000 5.000000 1.000000 8.000000
7.000000 3.000000 9.000000 7.000000
9.000000 4.000000 0.000000 5.000000
1.000000 6.000000 9.000000 3.000000
forke = 0 yani child 0
begin loop 13
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 513
6.000000 4.000000 6.000000 3.000000
5.000000 6.000000 6.000000 0.000000
2.000000 1.000000 5.000000 2.000000
7.000000 3.000000 9.000000 8.000000
forke = 0 yani child 0
begin loop 14
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 515
0.000000 3.000000 7.000000 8.000000
9.000000 1.000000 2.000000 4.000000
5.000000 7.000000 4.000000 8.000000
3.000000 1.000000 1.000000 6.000000
forke = 0 yani child 0
begin loop 15
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 517
0.000000 8.000000 9.000000 7.000000
7.000000 3.000000 3.000000 0.000000
4.000000 0.000000 8.000000 1.000000
5.000000 8.000000 4.000000 4.000000
forke = 0 yani child 0
begin loop 16
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 520
8.000000 3.000000 8.000000 8.000000
1.000000 7.000000 0.000000 3.000000
4.000000 2.000000 0.000000 3.000000
8.000000 4.000000 4.000000 8.000000
forke = 0 yani child 0
begin loop 17
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 522
9.000000 3.000000 9.000000 3.000000
3.000000 3.000000 2.000000 9.000000
9.000000 3.000000 6.000000 0.000000
8.000000 0.000000 8.000000 1.000000
forke = 0 yani child 0
begin loop 18
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 524
9.000000 1.000000 4.000000 3.000000
4.000000 8.000000 9.000000 2.000000
2.000000 5.000000 0.000000 6.000000
9.000000 0.000000 5.000000 7.000000
forke = 0 yani child 0
begin loop 19
signal sent
Open done
read done
close done
forke buyuktur >= parent olmali 526
2.000000 0.000000 5.000000 3.000000
1.000000 2.000000 1.000000 3.000000
3.000000 6.000000 5.000000 8.000000
7.000000 3.000000 2.000000 5.000000
forke = 0 yani child 0
Note that the client exited tidily after 20 loops; it should probably have been smaller.
Some of the changes
The code only calls srand() once
The code sets up the servers signal handler before messing with the FIFOs.
The code no longer deletes the FIFO in a loop.
The code no longer sets signalcheck to zero unless it was one. One problem seemed to be that the 'signalcheck` variable was zero whenever checked.
The code uses static volatile sig_atomic_t signalcheck.
The random matrix is generated before the fork() so that different results are seen.
The server cleans up dead children (zombies).
It really isn't clear why either the server or the child forks, but that was left more or less as found.
The signal handler doesn't use printf().
Improvements still needed
The code is far from perfect. There are multiple things that should be addressed, including:
Handling errors correctly (e.g. after fork() in server).
Mechanism to terminate server other than interrupt.
Detecting when client has stopped.
Not using signals to tell the server there is work.
Removing client forking.
Decide whether server really needs to fork.
Use configurable FIFO names.
Sort out matrix initialization better.
Etc.

SSE Matrix Multiplication NxN. Why is this error

I am beginner using SSE instructions, and I try to implement MMM. So, I implemented MMM using matriz 2by2, now I want to implement MMM using matrix NXN
#include <emmintrin.h>
#include <stdio.h>
#include <stdlib.h>
void simd_2x2(int lda, double *A, double *B, double *C)
{
__m128d a, b1, c1;
for (int k = 0; k < lda; k++) {
//printf("%f\n",C[k * lda]);
c1 = _mm_loadu_pd(C + k * lda); //load unaligned block in C
//c2 = _mm_loadu_pd(C + 1 * lda);
for (int i = 0; i < lda; ++i) {
a = _mm_load_pd(A + i * lda);//load aligned i-th column of A
b1 = _mm_load1_pd(B + i + k * lda); //load i-th row of B
//b2 = _mm_load1_pd(B + i + 1 * lda);
c1 = _mm_add_pd(c1, _mm_mul_pd(a, b1)); //rank-1 update
//c2 = _mm_add_pd(c2, _mm_mul_pd(a, b2));
}
_mm_storeu_pd(C + k * lda, c1); //store unaligned block in C
//_mm_storeu_pd(C + 1 * lda, c2);
}
}
int main() {
int n = 2;
double *buf = NULL;
buf = (double *)malloc(3 * n * n * sizeof(double));
double *A = buf + 0;
double *B = A + n * n;
double *C = B + n * n;
simd_2x2(n, A, B, C);
return 0;
}
When n=2 everything work fine:
A = 4.000000 3.000000
2.000000 4.000000
B = 1.000000 3.000000
2.000000 4.000000
C = 0.000000 0.000000
0.000000 0.000000
C = C + A * B = 10.000000 24.000000
10.000000 22.000000
but if n=4 I get the next:
A = 4.000000 0.000000 1.000000 4.000000
2.000000 1.000000 4.000000 0.000000
3.000000 1.000000 2.000000 1.000000
4.000000 1.000000 3.000000 1.000000
B = 1.000000 5.000000 9.000000 13.000000
2.000000 6.000000 10.000000 14.000000
3.000000 7.000000 11.000000 15.000000
4.000000 8.000000 12.000000 16.000000
C = 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000
C = C + A * B = 23.000000 59.000000 95.000000 131.000000
16.000000 44.000000 72.000000 100.000000
0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000
The last two rows are not calculated, why is this? Can someone help me?
I have almost 5 days reading about SSE but I can not fully understand and neither solve this problem.

LAPACK + C, weird behaviour

I am trying to solve a simple linear equations system using LAPACK. I use dbsvg method which is optimised for banded matrices. I've obsereved a realy strange behaviour. When I fill the AT matrix this way:
for(i=0; i<DIM;i++) AB[0][i] = -1;
for(i=0; i<DIM;i++) AB[1][i] = 2;
for(i=0; i<DIM;i++) AB[2][i] = -1;
for(i=0; i<3; i++)
for(j=0;j<DIM;j++) {
AT[i*DIM+j]=AB[i][j];
}
And call:
dgbsv_(&N, &KL, &KU, &NRHS, AT, &LDAB, myIpiv, x, &LDB, &INFO);
It works perfectly. However, when I do it this way:
for(i=0; i<DIM;i++) AT[i] = -1;
for(i=0; i<DIM;i++) AT[DIM+i] = 2;
for(i=0; i<DIM;i++) AT[2*DIM+i] = -1;
It results with a vector filled with NaN. Here are the declarations:
double AB[3][DIM], AT[3*DIM];
double x[DIM];
int myIpiv[DIM];
int N=DIM, KL=1, KU=1, NRHS=1, LDAB=DIM, LDB=DIM, INFO;
Any ideas?
You're not laying out the entries in the band storage properly; it was working before by a happy accident. The LAPACK docs say:
On entry, the matrix A in band storage, in rows KL+1 to
2*KL+KU+1; rows 1 to KL of the array need not be set.
The j-th column of A is stored in the j-th column of the
array AB as follows:
AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)
On exit, details of the factorization: U is stored as an
upper triangular band matrix with KL+KU superdiagonals in
rows 1 to KL+KU+1, and the multipliers used during the
factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
See below for further details.
So if you want a tridiagonal matrix with 2 on the diagonal and -1 above and below, the layout should be:
* * * * * * * ... * * * *
* -1 -1 -1 -1 -1 -1 ... -1 -1 -1 -1
2 2 2 2 2 2 2 ... 2 2 2 2
-1 -1 -1 -1 -1 -1 -1 ... -1 -1 -1 *
LDAB should be 4 in this case. Bear in mind that LAPACK uses a column-major layout, so the actual array should be look like this in memory:
{ *, *, 2.0, -1.0, *, -1.0, 2.0, -1.0, *, -1.0, 2.0, -1.0, ... }
dgbsv was giving different results for the two identical arrays because it was reading off the ends of the arrays that you had laid out.
Is this the exact code you used or just an example? I ran this code here (just cut and pasted from your posts, with a change of AT to AT2 in the second loop:
const int DIM=10;
double AB[DIM][DIM], AT[3*DIM], AT2[3*DIM];
int i,j;
for(i=0; i<DIM;i++) AB[0][i] = -1;
for(i=0; i<DIM;i++) AB[1][i] = 2;
for(i=0; i<DIM;i++) AB[2][i] = -1;
for(i=0; i<3; i++)
for(j=0;j<DIM;j++) {
AT[i*DIM+j]=AB[i][j];
}
printf("AT:");
for (i=0;i<3*DIM;++i) printf("%lf ",AT[i]);
printf("\n\n");
for(i=0; i<DIM;i++) AT2[i] = -1;
for(i=0; i<DIM;i++) AT2[DIM+i] = 2;
for(i=0; i<DIM;i++) AT2[2*DIM+i] = -1;
printf("AT2:");
for (i=0;i<3*DIM;++i) printf("%lf ",AT2[i]);
printf("\n\n");
printf("Diff:");
for (i=0;i<3*DIM;++i) printf("%lf ",AT[i]-AT2[i]);
printf("\n\n");
and got this output
AT:-1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.0000
00 -1.000000 -1.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.0
00000 2.000000 2.000000 2.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.0000
00 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
AT2:-1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000
000 -1.000000 -1.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.
000000 2.000000 2.000000 2.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000
000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
Diff:0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.
000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0
.000000 0.000000 0.000000 0.000000
Apparently AT and AT2 are the same. Which I would expect.

Resources