Mandelbrot message queue blocking - C - c

I'm having problems in using message queues in this program
it is supposed to launch a number of processes that is passed by argument to the program by the command prompt but it only launches and calculates the points for one process... the others don't get to launch ..
Please help me .
This is the program that creates the message queues and outputs the selected data into pgm format
only the first process gets to run the other processes don't
can anybody tell me why ?
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <stdlib.h>
#include <string.h>
void output_pgm(char *filename,double *buffer, int nx, int ny, double max) {
int i;
FILE *file;
file = fopen(filename,"w");
fprintf(file,"P2\n");
fprintf(file,"%d %d\n",nx,ny);
fprintf(file,"%d\n",(int)max);
for (i=0; i<nx*ny; i++) {
if (!(i%nx)) fprintf(file,"\n");
fprintf(file,"%d ",(int)buffer[i]);
}
fclose(file);
}
void main(int argc,char *argv[]) {
if(argc != 2) {
} else {
int n = atoi(argv[1]);
int i = 0;
struct msgbuf {
long mtype;
int x;
int y;
double value;
};
struct envio {
long mtype;
long type;
int ny;
double yM1;
double yM2;
};
key_t key = 123;
key_t key2 = 124;
int msgflg = IPC_CREAT | 0666;
int msqid = msgget(key,msgflg);
int msqid2 = msgget(key2,msgflg);
switch(fork()) {
case -1:
printf("Erro de fork");
break;
case 0:
printf("Oi: %d\n",n);
double *b;
int x,y,i,m;
double *ptr = b = malloc(1000*1000*sizeof(double));
printf("Chego(1)\n");
struct msgbuf a;
struct envio c;
size_t buflen = sizeof(a) - sizeof(long);
size_t len2 = sizeof(c) - sizeof(long);
printf("Chego(2)\n");
int msid = msgget(key,msgflg);
int msid2 = msgget(key2,msgflg);
printf("Chego(3)\n");
double aux = -1.0;
double multiplier = ((1.0/n) * 2);
c.mtype = 300;
int ny = (int)(1000/n);
for(i = 0; i < n; i++) {
c.type = (i+1);
c.ny = (int)(1000/n);
c.yM1 = aux;
c.yM2 = aux+multiplier;
printf("Chego aqui(2)\n");
if(msgsnd(msid2,&c,len2,0) < 0) {
perror("Erro do 1o envio\n");
}
}
for(m = 0; m < n; m++) {
printf("Entrei no ciclo(1)\n");
for(y = 0; y <ny ;y++) {
//printf("Chego(4)\n");
for(x = 0; x < 1000;x++) {
if(msgrcv(msid,&a,buflen,(long)(m+1),0) < 0) {
perror("Erro na recepcao:\n ");
}
//printf("Chego(5)\n");
b[a.y * ny + a.x] = a.value;
}
}
b = b + (1000/n)*1000;
}
output_pgm("mandel.pgm", ptr, 1000, 1000, 255);
//msgctl(msid, IPC_RMID, NULL);
//msgctl(msid2, IPC_RMID, NULL);
printf("Processo 1\n");
break;
default:
for(i = 0;i < n;i++) {
switch(fork()) {
case -1:
printf("Erro de fork");
case 0:
exit(0);
break;
default:
printf("Fui lancado\n");
execlp("/home/hyper/Documents/SO2/TP3-4/rec","rec",0);
break;
}
}
break;
}
}
};
The for loop in this program is run only once
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <stdlib.h>
#include <string.h>
double type;
struct senbuf {
long mtype;
int x;
int y;
double value;
};
int max_iterations = 256;
double compute_point(double ci, double cr) {
int iterations = 0;
double zi = 0;
double zr = 0;
while ((zr*zr + zi*zi < 4) && (iterations < max_iterations)) {
double nr, ni;
/* Z <-- Z^2 + C */
nr = zr*zr - zi*zi + cr;
ni = 2*zr*zi + ci;
zi = ni;
zr = nr;
iterations ++;
}
return iterations;
}
/* The "compute" function computes the Mandelbrot function over every
point on a grid that is "nx" points wide by "ny" points tall, where
(xmin,ymin) and (xmax,ymax) give two corners of the region the
complex plane.
*/
void compute(int msqid,int nx, int ny, double xmin, double xmax,
double ymin, double ymax,long type2 ) {
double delta_x, delta_y;
int x, y;
struct senbuf sen;
delta_x = (xmax - xmin)/nx;
delta_y = (ymax - ymin)/ny;
size_t buflen = sizeof(sen) - sizeof(long);
for (y=0; y<ny; y++) {
//printf("Ja entrei aqui");
double y_value = ymin + delta_y * y;
for (x=0; x<nx; x++) {
double x_value = xmin + delta_x * x;
sen.mtype = type;
sen.x = x;
sen.y = y;
sen.value = compute_point(x_value,y_value);
if(msgsnd(msqid,&sen,buflen,0) < 0) {
perror("Erro no envio:");
};
// printf("%f",sen.a[y*nx + x]);
//buffer[y*nx + x] = compute_point(x_value, y_value);
}
}
printf("Ja mandei %d\n",type2);
/*sen.mtype=500;
sen.test = -1;
printf("Ja to a sair\n");
msgsnd(msqid,&sen,buflen,IPC_NOWAIT);*/
}
/* Output the data contained in the buffer to a Portable Greymap format
image file. The parameter "max" should be an upper bound for the
data values in the buffer.
*/
void output_pgm(char *filename,double *buffer, int nx, int ny, double max) {
int i;
FILE *file;
file = fopen(filename,"w");
fprintf(file,"P2\n");
fprintf(file,"%d %d\n",nx,ny);
fprintf(file,"%d\n",(int)max);
for (i=0; i<nx*ny; i++) {
if (!(i%nx)) fprintf(file,"\n");
fprintf(file,"%d ",(int)buffer[i]);
}
fclose(file);
}
int main()
{
int msqid;
int msqid2;
struct recep {
long mtype;
long type;
int ny;
double yM1;
double yM2;
};
struct recep a;
size_t len = sizeof(a) - sizeof(long);
key_t key = 124;
msqid = msgget(key, 0666);
if(msgrcv(msqid, &a, len, 300, 0) < 0) {
perror("Error checking");
};
printf("Dados :\n Tipo : %d\n Ny: %d\n,yM1 : %f\n yM2: %f\n",a.type,a.ny,a.yM1,a.yM2);
type = a.type;
printf("Vou iniciar o compute");
key_t key2 = 123;
msqid2 = msgget(key2,0666);
compute(msqid2,1000,a.ny, -1.0, 1.0,a.yM1,a.yM2,a.type);
}

In your first switch statement you create one child doing whatever it is doing. The parent falls into a second switch statement in which the child immediately exits and the parent is overlaid with the "rec" executable. The parent no longer executes at point - it is the program "rec". You are never going to execute more than one pass of the loop because the code that is executing is gone at that point. If you want multiple instances of "rec" running you should be using execlp on the children not the parent.
EDIT
There are two system calls wait and waitpid that provides various options. The simpler of these is wait and should be sufficient for what you are doing. Define and increment a counter in the parent for each child you create. Then instead of just exiting the parent you wait for all the children to finish. Something as simple as this should suffice:
for (int i = 0; i < counter; i++)
{
wait(NULL);
}

Related

MPI_SEND and MPI_RECIEVE have no Reference by Compile

I am trying to compile an MPI program with mpicc. The compiler complains only that there is no reference to MPI_RECIVE and MPI_SEND, and ends the compile error. I have #include in the .c file.
Can someone tell me how I can fix this?
Here ist the Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <mpi.h>
#include "random.h"
#include "md5tool.h"
/* horizontal size of the configuration */
#define XSIZE 1024
/* "ADT" State and line of states (plus border) */
typedef char State;
typedef State Line[XSIZE + 2];
/* determine random integer between 0 and n-1 */
#define randInt(n) ((int)(nextRandomLEcuyer() * n))
/* random starting configuration */
static void initConfig(Line *buf, int lines){
int x, y;
initRandomLEcuyer(424243);
for (y = 1; y <= lines; y++) {
for (x = 1; x <= XSIZE; x++) {
buf[y][x] = randInt(100) >= 50;
}
}
}
/* annealing rule from ChoDro96 page 34
* the table is used to map the number of nonzero
* states in the neighborhood to the new state
*/
static State anneal[10] = {0, 0, 0, 0, 1, 0, 1, 1, 1, 1};
/* a: pointer to array; x,y: coordinates; result: n-th element of anneal,
where n is the number of neighbors */
#define transition(a, x, y) \
(anneal[(a)[(y)-1][(x)-1] + (a)[(y)][(x)-1] + (a)[(y)+1][(x)-1] +\
(a)[(y)-1][(x) ] + (a)[(y)][(x) ] + (a)[(y)+1][(x) ] +\
(a)[(y)-1][(x)+1] + (a)[(y)][(x)+1] + (a)[(y)+1][(x)+1]])
/* treat torus like boundary conditions */
static void boundary(Line *buf, int lines){
int x,y;
for (y = 0; y <= lines+1; y++) {
/* copy rightmost column to the buffer column 0 */
buf[y][0 ] = buf[y][XSIZE];
/* copy leftmost column to the buffer column XSIZE + 1 */
buf[y][1+1] = buf[y][1 ];
}
for (x = 0; x <= XSIZE+1; x++) {
/* copy bottommost row to buffer row 0 */
buf[0][x ] = buf[lines][x];
/* copy topmost row to buffer row lines + 1 */
buf[lines+1][x] = buf[1][x ];
}
}
/* make one simulation iteration with lines lines.
* old configuration is in from, new one is written to to.
*/
//umschreiben
/**
static void simulate(Line *from, Line *to, int lines){
boundary(from, lines);
for (y = 1; y <= lines; y++) {
for (x = 1; x <= XSIZE; x++) {
to[y][x ] = transition(from, x , y);
}
}
}
*/
/* --------------------- measurement ---------------------------------- */
int main(int argc, char** argv){
int lines, its;
int i;
Line *from, *to, *temp, *next;
char* hash;
assert(argc == 3);
lines = atoi(argv[1]);
its = atoi(argv[2]);
from = malloc((lines + 2) * sizeof(Line));
to = malloc((lines + 2) * sizeof(Line));
MPI_Init(NULL, NULL);
// Get the number of processes
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Get the rank of the process
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
if(world_rank == 0){
int y;
next = malloc((lines + 2) * sizeof(Line));
initConfig(from, lines);
for (i = 0; i < its; i++) {
boundary(from, lines);
int z;
for(z = 0; z < world_size;z++){
if(z !=world_rank ){
MPI_SEND(from,(lines + 2) * sizeof(Line),MPI_CHARACTER,z,0,MPI_COMM_WORLD);
}
}
MPI_Status status;
for(z = 0; z < world_size;z++){
if(z !=world_rank ){
MPI_RECIVE(next,(lines + 2) * sizeof(Line),MPI_CHARACTER,z,1,&status);
if(status.MPI_ERROR){
//TODO
MPI_Abort(MPI_COMM_WORLD,1);
}
for (y = 1; y <= (lines%world_size+lines/world_size); y++) {
stpcpy(to[y*z],next[y*z]);
}
}
}
temp = from;
from = to;
to = temp;
}
hash = getMD5DigestStr(from[1], sizeof(Line) * (lines));
printf("hash: %s\n", hash);
free(next);
}else{
int x,y;
MPI_Status status;
for(i = 0; i < its; i++){
MPI_RECIVE(from,(lines + 2) * sizeof(Line),MPI_CHARACTER,0,0,&status);
if(status.MPI_ERROR){
MPI_Abort(MPI_COMM_WORLD,2);
}
for (y = 1; y <= (lines%world_size+lines/world_size); y++) {
for (x = 1; x <= XSIZE; x++) {
to[y*world_rank][x ] = transition(from, x , y*world_rank);
}
}
MPI_SEND(to,(lines + 2) * sizeof(Line),MPI_CHARACTER,0,1,MPI_COMM_WORLD);
}
}
MPI_Finalize();
free(from);
free(to);
free(hash);
return 0;
}
This is a C a Sequence implementation which I wrote for the university as a homework assignment.
Are you talking about MPI_Send and MPI_Recv ?
Don't know about any MPI_SEND or MPI_RECIV function...
I think you just mispelled them.
BTW: here is a great tutorial about how to use them http://mpitutorial.com/tutorials/mpi-send-and-receive/

Difference in behavior between clang and gcc?

I'm writing a C function to simulate a cache given an address trace. The function works as expected when compiled on my mac using gcc (really clang). gcc --version on my mac returns this:
Configured with: --prefix=/Applications/Xcode.app/Contents/Developer/usr --with-gxx-include-dir=/usr/include/c++/4.2.1
Apple LLVM version 8.1.0 (clang-802.0.42)
When I compile the same program on linux using gcc, the returns are way off, and eC & hC in my program (cache eviction counter and hit counter) are in the hundreds of thousands, when they should be below 10. When typing gcc --version on the linux machine, it returns this:
gcc (Ubuntu 4.9.3-8ubuntu2~14.04) 4.9.3
Here is the program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include <getopt.h>
#include "cachelab.h"
typedef struct{
int v;
int t;
int LRU;
} block;
typedef struct{
block *blocks;
} set;
typedef struct{
set *sets;
} cache;
void simulate(int s, int E, int b, char* file, int* hC, int* mC, int* eC)
{
int numSets = (1 << s);
char operation;
int address;
int size;
int curTag;
int curSet;
int maxLRU = 0;
int curLRU = 0;
int check = 0;
cache c;
set *sets = malloc(sizeof(set) * numSets);
c.sets = sets;
int i = 0;
while(i < numSets)
{
c.sets[i].blocks = malloc(sizeof(block) * E);
for (int j = 0; j < E; j++)
{
c.sets[i].blocks[j].v = 0;
c.sets[i].blocks[j].t = INT_MIN;
c.sets[i].blocks[j].LRU = 0;
}
i++;
}
FILE *f = fopen(file, "r");
while(fscanf(f," %c %x,%d", &operation, &address, &size) != EOF)
{
check = 0;
curTag = ((unsigned int) address) >> (s+b);
curSet = (address >> b) & ((1 << s) - 1);
for (int i = 0; i < E; i++)
{
c.sets[curSet].blocks[i].LRU++;
if(c.sets[curSet].blocks[i].LRU >= maxLRU)
{
maxLRU = c.sets[curSet].blocks[i].LRU;
curLRU = i;
}
if(curTag == c.sets[curSet].blocks[i].t)
{
*hC = *hC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[i].LRU = 0;
check = 1;
}
}
if(check == 0)
{
for(int i = 0; i < E; i++)
{
if(c.sets[curSet].blocks[i].v == 0)
{
*mC = *mC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[i].v = 1;
c.sets[curSet].blocks[i].LRU = 0;
c.sets[curSet].blocks[i].t = curTag;
check = 1;
break;
}
}
}
if(check == 0)
{
*eC = *eC + 1;
*mC = *mC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[curLRU].t = curTag;
c.sets[curSet].blocks[curLRU].v = 1;
c.sets[curSet].blocks[curLRU].LRU = 0;
}
}
}
int main(int argc, char** argv)
{
int hitCount, missCount, evictionCount;
int s, E, b;
char *file;
char opt;
while((opt = getopt(argc,argv,"v:h:s:E:b:t:")) != -1)
{
switch(opt){
case 'v':
break;
case 'h':
break;
case 's':
s = atoi(optarg);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
file = optarg;
break;
default:
exit(1);
}
}
simulate(s, E, b, file, &hitCount, &missCount, &evictionCount);
printSummary(hitCount, missCount, evictionCount);
return 0;
}
EDIT:
I understand that this is due to a difference between clang and gcc. Does anyone have any information about how I can go about fixing this discrepancy?
Here is cachelab.c:
/*
* cachelab.c - Cache Lab helper functions
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "cachelab.h"
#include <time.h>
trans_func_t func_list[MAX_TRANS_FUNCS];
int func_counter = 0;
/*
* printSummary - Summarize the cache simulation statistics. Student cache simulators
* must call this function in order to be properly autograded.
*/
void printSummary(int hits, int misses, int evictions)
{
printf("hits:%d misses:%d evictions:%d\n", hits, misses, evictions);
FILE* output_fp = fopen(".csim_results", "w");
assert(output_fp);
fprintf(output_fp, "%d %d %d\n", hits, misses, evictions);
fclose(output_fp);
}
/*
* initMatrix - Initialize the given matrix
*/
void initMatrix(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
B[j][i]=rand();
}
}
}
void randMatrix(int M, int N, int A[N][M]) {
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
}
}
}
/*
* correctTrans - baseline transpose function used to evaluate correctness
*/
void correctTrans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerTransFunction - Add the given trans function into your list
* of functions to be tested
*/
void registerTransFunction(void (*trans)(int M, int N, int[N][M], int[M][N]),
char* desc)
{
func_list[func_counter].func_ptr = trans;
func_list[func_counter].description = desc;
func_list[func_counter].correct = 0;
func_list[func_counter].num_hits = 0;
func_list[func_counter].num_misses = 0;
func_list[func_counter].num_evictions =0;
func_counter++;
}
You forgot to initialize the counters and flags so they start at undefined values. The following lines:
int hitCount, missCount, evictionCount;
int s, E, b;
should be:
int hitCount = 0, missCount = 0, evictionCount = 0;
int s = 0, E = 0, b = 0;
It just happens that the initial values happen to be lower on the mac so you're not getting correct results on the mac either (at least not guaranteed since the initial value is undefined).

Efficient way to find rows with same elements in a 3D matrix in C

I have a 3D matrix mat[100][100][100]. What is the efficient way to find a row with same elements that appears in mat[0][][], mat[1][][],....,mat[99][][]?
A simple approach would be comparing each row of mat[0][][] to all rows of the remaining 99 matrices, but it wouldn't be very efficient(I guess). Is there a better way to do it?
To expand on the comment by #chux, the first step is to compute a hash value for each row of each matrix. That's 10000 hash values in all. The results should be stored in an array of 10000 structs.
struct info
{
int m; // the matrix number
int row; // the row number
uint32_t hash; // the hash value for mat[m][row]
};
static struct info hashArray[10000];
After filling in all 10000 entries of the hashArray, sort the array by hash value. Then you can simply scan the array to find any duplicate hash values. When you do find duplicates, you need to confirm by comparing the row elements.
I finally found some time to write the content addressable code. It turns out to be much faster than using hash tables. But the catch is that the code is way more complex and the program takes WAY more memory. My final opinion is that unless you really need the extra speed, stick with the hash table.
Some examples of test runs are given below. The argument to the program specify the number of unique rows. The program fills the rest with randomly chosen existing rows. Then the rows are shuffled. The program looks for all duplicate rows and reports the number of duplicate rows and the time it took for both hash and content addressable tables.
bing#mint5 ~ $ cc -O2 cattest.c -o cattest
bing#mint5 ~ $ ./cattest 500
CAT Test 9500 0.0083
Hash Test 9500 0.0499
bing#mint5 ~ $ ./cattest 5000
CAT Test 5000 0.0195
Hash Test 5000 0.1477
bing#mint5 ~ $ ./cattest 9000
CAT Test 1000 0.0321
Hash Test 1000 0.1092
/* content addressable table vs hash table */
/* written by Bing H Bang */
/* I DONOT give permission to any snot-nosed students to copy my work and turn it in
as their own */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
#include <errno.h>
#include <string.h>
#include <sys/time.h>
#include <sys/sysinfo.h>
double etime()
{
struct timeval tv;
double dt, df;
gettimeofday(&tv, NULL);
dt = (double)(tv.tv_sec);
df = ((double)(tv.tv_usec))/1000000.0;
return(dt+df);
}
struct CAT_entry
{
unsigned fval;
unsigned short rows[10000];
unsigned short num;
unsigned short used;
struct CAT_entry *next;
} *CAT[256] = {NULL};
struct CAT_entry stmem[10000];
int stidx = 0;
unsigned dat[100][10000];
char map[10000];
unsigned hasht[10000];
#define highbit (1 << ((sizeof(unsigned)*8)-1))
unsigned
rotxor(unsigned sum, unsigned v)
{
if((sum & highbit) == 0)
return ((sum << 1) ^ v);
else
return (((sum << 1) | 1) ^ v);
}
unsigned
compute_hash(int y)
{
int x;
unsigned sum = 0;
for(x = 0; x < 100; ++x)
sum = rotxor(sum, dat[x][y]);
return sum;
}
void
mk_hasht()
{
int y;
for(y = 0; y < 10000; ++y)
hasht[y] = compute_hash(y);
}
clearmap()
{
memset((void *)map, 0, 10000);
}
comprow(int y, int yd)
{
int x;
for(x = 0; x < 100; ++x)
if(dat[x][y] != dat[x][yd])
return 0;
return 1;
}
struct CAT_entry **
srch_CAT(unsigned value)
{
struct CAT_entry **p = &(CAT[value&255]);
static struct CAT_entry *r = NULL;
while(*p != NULL)
{
if((*p)->fval == value)
break;
if((*p)->fval > value)
return &r;
else
p = &((*p)->next);
}
return p;
}
void
add_entry(int y, unsigned value)
{
struct CAT_entry **p = &(CAT[value&255]), *q;
while(*p != NULL)
{
q = *p;
if(q->fval == value)
{
q->rows[q->num] = y;
q->num++;
return;
}
if(q->fval > value)
break;
p = &(q->next);
}
q = *p;
//*p = malloc(sizeof(struct CAT_entry));
*p = &stmem[stidx++];
(*p)->next = q;
q = *p;
q->fval = value;
q->num = 0;
q->used = 0;
}
void
mk_CAT()
{
int x,y;
struct CAT_entry **p, *q;
for(y = 0; y < 10000; y++)
add_entry(y, dat[0][y]);
for(x=0; x < 256; ++x)
{
p = &(CAT[x]);
while(*p != NULL)
{
q = *p;
if(q->num == 0)
{
*p = q->next;
//free(q);
}
else
p = &(q->next);
}
}
}
void
gen_data(int npat)
{
int x, y, rnum, limit;
unsigned r;
srandom(time(NULL));
rnum = npat * 100;
for(y = 0; y < rnum; ++y)
dat[y%100][y/100] = random();
for(y = npat; y < 10000; ++y)
{
rnum = random() % npat;
for(x = 0; x < 100; ++x)
dat[x][y]=dat[x][rnum];
}
for(y = 0; y < 10000; ++y)
{
rnum = random() % 10000;
if(rnum == y)
continue;
for(x = 0; x < 100; ++x)
{
r = dat[x][y];
dat[x][y]=dat[x][rnum];
dat[x][rnum] = r;
}
}
}
int
do_CATtest()
{
int y, yd, count = 0, i;
struct CAT_entry **p, *q;
mk_CAT();
clearmap();
for(y = 0; y < 9999; ++y)
{
if(map[y] == 0)
{
map[y] = 1;
if(*(p = srch_CAT(dat[0][y])) != NULL)
{
for(q = *p, i = 0; i < q->num; ++i)
{
yd = q->rows[i];
if(map[yd] == 0)
{
if(comprow(y, yd))
{
map[yd] = 1;
++count;
q->used++;
}
}
}
if(q->num <= q->used)
*p = q->next;
}
}
}
return count;
}
int
do_hashtest()
{
unsigned h;
int x, y, yd, count = 0;
mk_hasht();
clearmap();
for(y = 0; y < 9999; ++y)
{
if(map[y] != 0)
continue;
map[y] = 1;
h = hasht[y];
for(yd = y+1; yd < 10000; ++yd)
{
if(map[yd] != 0)
continue;
if(h == hasht[yd])
if(comprow(y, yd))
{
map[yd] = 1;
++count;
}
}
}
return count;
}
main(int c, char *v[])
{
int npat = 0, count;
double t1, t2;
if(c == 2)
npat = atoi(v[1]);
if(npat <= 0 || npat >= 10000)
{
puts("input param error");
exit(1);
}
gen_data(npat);
npat = 10000 - npat;
t1 = etime();
if((count = do_CATtest()) != npat)
{
printf("CAT test error, %d matches found, not %d", count, npat);
exit(1);
}
t2 = etime();
printf("CAT Test %d %.4f\n", npat, t2-t1);
t1 = etime();
if((count = do_hashtest()) != npat)
{
printf("hash test error, %d matches found, not %d", count, npat);
exit(1);
}
t2 = etime();
printf("Hash Test %d %.4f\n", npat, t2-t1);
}
Make a content addressable table of the first values in each row. Then go through each row, take the first value and look it up on the table. If the lookup returns multiple rows, then those rows should be checked for a match. The searched rows should be remembered as to increase efficiency because the checked rows need not be checked again. You'll end up with a list of identical row groupings.

How do I broadcast a 2D array to all processes such that it can be accessed by a function in each rank?

I am totally new to MPI, therefore, could somebody please tell me what I am doing wrong in the following code? I have constructed this code basing on an answer that I found on stackoverflow, so I just modified it a little to suit my needs. The problem I am facing is that a 2D array "A" created in process 0 cannot be seen by the other processes of rank 1,2, and 3 (I have only four processors). That is, when a process wants to use this matrix "A" in a function matrix_element(), i.e. (the place where this occurs is indicated by stars *)
cc[m][n]=matrix_element(m,n,x,ngauher,A)
called by a process of rank higher than 0, the program terminates with a segmentation fault. Only the root process 0 is able to use this array to produce subarrays cc[m][n] inside each process. I have tried to broadcast the 2D array "A" to all processes using
for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
}
so that it can be used by each process, but the code terminates then even earlier with an error having to do with MPI_Barrier called later on in the code. I just want to know how to broadcast "A" to all processes correctly and if there is anything wrong with the call of "A" inside the matrix_element() function to be used by all processes. Anyways, the code follows
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "nrutil.h"
#define SQR(x) ((x)*(x))
double x[1000],w[1000];
double result,global_result;
double **A,**c,**cc;
long nrl,nrh,ncl,nch;
int i,j,MAX_M,n,m,k;
int mx,my,mz,nx,ny,nz;
int ngauher;
int lstart,lend,id,p,num_elements;
int count;
FILE *ingau,*inc,*mdat,*outptr_A;
FILE *globalarrayptr=NULL;
FILE *globalarrayptr2=NULL;
FILE *localarrayptr=NULL;
char *globalarray;
char *localarray;
int malloc2d(double ***array, int n, int m);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
double matrix_element(int m, int n, double **A);
double hermitef(double u, int m);
double calculate_c(int m,int n, double *x, double *w, int ngauher);
double *h;
int main(int argc, char **argv) {
const int MAX_M=10;
const int procMAX_M=2;
const int ngauher=20;
int i,j,k,id,p,disp;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&id);
MPI_Comm_size(MPI_COMM_WORLD,&p);
if((localarrayptr = fopen("a.dat","w")) == NULL) {
fprintf(stderr,"Could not open file a.dat\n");
exit(1);
}
if(id == 0) {
if((ingau = fopen("gauspnts.dat","r")) == NULL) {
fprintf(stderr,"Could not open file gauspnts.dat");
exit(1);
}
if((globalarrayptr = fopen("c.dat","w")) == NULL) {
fprintf(stderr,"Could not open file c.dat\n");
exit(1);
}
printf(" opened files \n");
h = (double *)calloc(MAX_M,sizeof(double));
nrl=0;
nrh=MAX_M;
ncl=0;
nch=MAX_M;
malloc2d(&c,MAX_M,MAX_M);
malloc2d(&A,MAX_M,MAX_M);
for(i=0;i<nrh;i++) {
for(j=0;j<nch;j++) {
c[i][j]=0.0;
fprintf(globalarrayptr," %g ",c[i][j]);
}
fprintf(globalarrayptr,"\n");
}
for(k=0;k<ngauher;k++) {
fscanf(ingau," %lf %lf \n",&x[k],&w[k]);
printf(" %g %g \n",x[k],w[k]);
}
// MPI_Bcast(x,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
// MPI_Bcast(w,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
/* The problematic array is created right here in rank 0*/
for(m=0;m<MAX_M;m++) {
for(n=0;n<MAX_M;n++) {
A[m][n]=calculate_c(m,n,x,w,ngauher);
printf(" rank=%d A[%d][%d] = %g \n",i,m,n,A[m][n]);
}
}
}
for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work and
// causes a seg fault
}
nrl=0;
nrh=MAX_M/procMAX_M;
ncl=0;
nch=MAX_M/procMAX_M;
malloc2d(&cc,MAX_M/procMAX_M,MAX_M/procMAX_M);
int sizes[2] = {MAX_M,MAX_M};
int subsizes[2] = {MAX_M/procMAX_M,MAX_M/procMAX_M};
int starts[2] = {0,0};
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&type);
MPI_Type_create_resized(type,0,MAX_M/procMAX_M*sizeof(double),&subarrtype);
MPI_Type_commit(&subarrtype);
double *globalptr=NULL;
if(id == 0) globalptr=&(c[0][0]);
int sendcounts[procMAX_M*procMAX_M];
int displs[procMAX_M*procMAX_M];
if(id == 0) {
for(i=0; i<procMAX_M*procMAX_M;i++) sendcounts[i] = 1;
disp = 0;
for(i=0;i<procMAX_M;i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
if (id == 0) {
for(i=0; i<procMAX_M*procMAX_M; i++) sendcounts[i] = 1;
disp = 0;
for(i=0; i<procMAX_M; i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
MPI_Scatterv(globalptr,sendcounts,displs,subarrtype,&(cc[0][0]),
MAX_M*MAX_M/(procMAX_M*procMAX_M),
MPI_DOUBLE,0,MPI_COMM_WORLD);
/* all processors print their local data */
for(i=0;i<p;i++) {
if(id == i) {
printf("Local process on rank %d is:\n", id);
for(m=0;m<MAX_M/procMAX_M;m++) {
putchar('|');
for (n=0; n<MAX_M/procMAX_M;n++) {
// if(id == 0) { j=m; k=n; }
// if(id == 1) { j=m+procMAX_M-1; k=n; }
// if(id == 2) {j=m; k=n+procMAX_M-1; }
// if(id == 3) {j=m+procMAX_M-1; k=n+procMAX_M-1; }
/* ************************************************************************* */
cc[m][n]=matrix_element(m,n,A); // THE PROBLEM WITH "A" ARISES HERE!!
/* ************************************************************************* */
printf(" cc[%d][%d] = %g id = %d\n",m,n,cc[m][n],id);
}
printf("|\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* it all goes back to process 0 */
MPI_Gatherv(&(cc[0][0]),MAX_M*MAX_M/(procMAX_M*procMAX_M),MPI_DOUBLE,
globalptr,sendcounts,displs,subarrtype,0, MPI_COMM_WORLD);
/* don't need the local data anymore */
/* or the MPI data type */
MPI_Type_free(&subarrtype);
if (id == 0) {
if((globalarrayptr2 = fopen("B.dat","w")) == NULL) {
fprintf(stderr,"Could not open file B.dat\n");
exit(1);
}
for(i=0;i<MAX_M;i++) {
for(j=0;j<MAX_M;j++) {
fprintf(globalarrayptr2," %g ",c[i][j]);
}
fprintf(globalarrayptr2,"\n");
}
}
MPI_Finalize();
return 0;
}
int malloc2d(double ***array, int n, int m) {
double *p;
/* allocate the n*m contiguous items */
p = (double *)malloc(n*m*sizeof(double));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (double **)malloc(n*sizeof(double *));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i=0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
double calculate_c(int m, int n, double *x, double *w, int ngauher) {
double result=0;
int i;
for(i=0;i<ngauher;i++) {
result+=w[i]*exp(-SQR(x[i]))*SQR(hermitef(x[i],m)*hermitef(x[i],n));
}
return(result);
}
double hermitef(double u, int m) {
int j;
double x,pi;
pi=acos(-1.);
x=u;
h[0]=1./pow(pi,0.25);
h[1]=sqrt(2.)*x/pow(pi,0.25);
for(j=2;j<m+1;j++) {
h[j] = sqrt(2./(double)j)*x*h[j-1]-sqrt((double)(j-1)/((double)j))*h[j-2];
}
return(h[m]);
}
double matrix_element(int m, int n, double **A) {
result=0.;
/* In this function, A is seen only by the root process ?? */
for(mx=0;mx<=m;mx++) {
for(my=0;my<=m;my++) {
for(mz=0;mz<=m;mz++) {
for(nx=0;nx<=n;nx++) {
for(ny=0;ny<=n;ny++) {
for(nz=0;nz<=n;nz++) {
if(((mx+my+mz == m) && (nx+ny+nz == n))) {
result+=A[mx][nx]*A[my][ny]*A[mz][nz];
}
}
}
}
}
}
}
return(result);
}
If you want to MPI_Bcast() you array to every process, your array should be allocated size times, one for each process. Allocating on rank 0 is not enough. The problem is :
if(id==0){
...
malloc2d(&A,MAX_M,MAX_M);
...
}
Try to get the malloc2d(&A,MAX_M,MAX_M); out of this test and the MPI_Bcast() will work fine.
Notice that since you allocated your 2D array to have values contiguous in memory, you can use
MPI_Bcast(A[0],MAX_M*MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
Bye,

MPI_AllGather not gather properly...all elements end up the same value?

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#define h 1
#define XY0 0
#define MAX_XY 5
#define N 2 //particles per subdomain
#define BLOCKS 4
#define a 1
#define b 1
float velocityX(float x, float y);
float velocityY(float x, float y);
int malloc2dfloat(float ***array, int length);
int main (int argc, char **argv)
{
typedef struct {
float xcoord;
float ycoord;
float velx;
float vely;
} particle;
int points= (int) floor((MAX_XY - XY0)/h) + 1;
int procsize = 2;
int myid, nproc;
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offset[1], extent;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
int startElementX, startElementY, endElementX, endElementY;
particle* sub_pars = (particle*)malloc(sizeof(particle)*N);
offset[0] = 0;
int blockcounts[1];
blockcounts[0] = 4;
oldtypes[0] = MPI_FLOAT;
MPI_Type_struct(1, blockcounts, offset, oldtypes, &particletype);
MPI_Type_commit(&particletype);
particle* particles = (particle*)malloc(sizeof(particle) * N * procsize*procsize);
if (nproc != procsize*procsize){
printf("Must use np=4 -- split into 4 blocks");
MPI_Abort(MPI_COMM_WORLD,1);
}
srand(time(NULL)+myid);
if (myid == 0)
{
float mins[4];
startElementX = 0;
startElementY = 0;
endElementX = (points/procsize)-1;
endElementY = (points/procsize) -1;
}
else if (myid == 1)
{
startElementX = 0;
startElementY = (points/procsize);
endElementX = (points/procsize) -1;
endElementY = points - 1;
}
else if (myid == 2)
{
startElementX = (points/procsize);
startElementY = 0;
endElementX = points - 1;
endElementY = (points/procsize) -1;
}
else
{
startElementX = (points/procsize);
startElementY = (points/procsize);
endElementX = points-1;
endElementY = points-1;
}
int i;
float localmin;
float mag;
for (i=0; i<N; i++)
{
sub_pars[i].xcoord = ((startElementX + rand()/(RAND_MAX / (endElementX-startElementX+1)+1)))*h + XY0;
printf("%f\n", sub_pars[i].xcoord);
sub_pars[i].ycoord = ((startElementY + rand()/(RAND_MAX / (endElementY-startElementY+1)+1)))*h + XY0;
sub_pars[i].velx = velocityX(sub_pars[i].xcoord, sub_pars[i].ycoord);
sub_pars[i].vely = velocityY(sub_pars[i].xcoord, sub_pars[i].ycoord);
mag = sqrt(sub_pars[i].velx*sub_pars[i].velx + sub_pars[i].vely*sub_pars[i].vely);
if (i==0 || localmin > mag) localmin = mag;
}
printf("localmin of %d is %.2f \n", myid, localmin);
MPI_Allgather(&sub_pars, 1, particletype, particles ,1, particletype, MPI_COMM_WORLD);
MPI_Finalize();
if(myid == 0)
{
int k;
for (k=0; k<N*4; k++)
{
printf("test %.2f \n", particles[i].xcoord);
}
}
return 0;
}
float velocityX(float x, float y)
{
float temp = (a+(b*(y*y-x*x))/((x*x+y*y)*(x*x+y*y)));
return temp;
}
float velocityY(float x, float y)
{
float temp = (-1*(2*b*x*y)/((x*x+y*y)*(x*x+y*y)));
return temp;
}
It just returns the same value for all the particles, but I know they are being calculate correctly within each thread, so something is wrong with my MPI_Allgather, can someone please explain how it should look?
You have made a very common mistake: the & (address-of) operator in the first argument that you pass to MPI_Allgather is unnecessary. sub_pars is already a pointer and calling MPI_Allgather with &sub_pars passes a pointer to the pointer (a location somewhere in the stack frame of the main() routine) instead of pointer to the actual data.

Resources