Optimizing performance - implementing custom algorithm on graphs - c

For my university assignment I need to come up with an algorithm of finding a spanning tree with maximum number of edges with same weight. The description of the task can be found here: Find a spanning tree with maximum number of edges with same weight . There you can also see the upvoted solution (suggested by #mrip) that I've implemented in C language.
I have tested the code on my local machine and it gives correct outputs on different data sets. However, when I upload the solution to the elevation system, I see that the program completion time is up to 3 times longer than the reference time.
Here are two files that are in the project. I added detailed comments, of course:
header.h
//struct for subsets used in MST Kruskal algoritm
typedef struct subset {
int parent;
int rank;
} subset_t, *subset_p;
//struct for storing graph edges
typedef struct edges {
int src;
int dest;
int weight;
} edges_t, *edges_p;
//struct for storing weights and number of their occuriences
typedef struct weights {
int weight;
int occurCount;
} weights_t, *weights_p;
//struct to store all built trees
typedef struct trees {
int totalWeight; // total tree weight
int mostOccurNumber; // highest number of repeated edges for a tree
} trees_t, *trees_p;
//find and union function prototypes
int find(struct subset subsets[], int i);
void Union(struct subset subsets[], int x, int y);
Source Code.cpp
#include <stdio.h>
#include <stdlib.h>
#include "header.h"
//find function used in Kruskals algorithm
int find(subset_p subsets, int i) {
if (subsets[i].parent != i) {
subsets[i].parent = find(subsets, subsets[i].parent);
}
return subsets[i].parent;
}
//union function used in Kruskals algorithm
void Union(subset_p subsets, int x, int y) {
int xroot = find(subsets, x);
int yroot = find(subsets, y);
if (subsets[xroot].rank < subsets[yroot].rank) {
subsets[xroot].parent = yroot;
} else if (subsets[xroot].rank > subsets[yroot].rank) {
subsets[yroot].parent = xroot;
} else {
subsets[yroot].parent = xroot;
subsets[xroot].rank++;
}
}
//compare function used in qsort(). Sorts all edges by ascending weight
int myComp1 (const void *a, const void *b)
{
const edges_t * ptr_a = (const edges_t *)a;
const edges_t * ptr_b = (const edges_t *)b;
if (ptr_a->weight < ptr_b->weight) return -1;
if (ptr_a->weight > ptr_b->weight) return 1;
return 0;
}
//Sorts all present weights by descending number of occuriences in the MST
int myComp2 (const void *a, const void *b)
{
const weights_t * ptr_a = (const weights_t *)a;
const weights_t * ptr_b = (const weights_t *)b;
if (ptr_a->occurCount > ptr_b->occurCount) return -1;
if (ptr_a->occurCount < ptr_b->occurCount) return 1;
return 0;
}
//Sorts all present MSTs primarily by descending number of same-weight occuriences
//Secondly by ascending weights
int myComp3 (const void *a, const void *b)
{
const weights_t * ptr_a = (const weights_t *)a;
const weights_t * ptr_b = (const weights_t *)b;
int diff = ptr_b->occurCount - ptr_a->occurCount;
if (diff == 0) {
if (ptr_a->weight < ptr_b->weight) {
diff = -1;
} else if (ptr_a->weight > ptr_b->weight) {
diff = 1;
} else diff = 0;
}
return diff;
}
int main() {
//number of vertices and edges for a graph
int num_vertices, num_edges;
scanf("%d%d", &num_vertices, &num_edges);
// struct to keep all graph edges
edges_p allEdges = (edges_p)malloc(num_edges*sizeof(edges_t));
//input variables for source vertex, destanation vertex and weight of the edge
int curr_src, curr_dest, curr_weight;
//array to store all present (different!) weight values
int * weights = (int *)malloc(num_edges*sizeof(int));
//a variable to store number of elements in 'weights' array - number of different weight values in a graph
int newWeightIndex = 0;
//inputing data about graph edges: source vertex, destination vertex, weight
for (int i = 0; i < num_edges; i++) {
scanf("%d%d%d", &curr_src, &curr_dest, &curr_weight);
//filling array of structs with input info
allEdges[i].src = curr_src - 1;
allEdges[i].dest = curr_dest - 1;
allEdges[i].weight = curr_weight;
//'Weights' array contains all weights that are present in a graph.
//Here we decide whether we should put current weight value into an array.
bool alreadyHasWeight = 0;
for (int j = 0; j < i; j++) {
if (weights[j] == curr_weight) {
alreadyHasWeight = 1;
break;
}
}
if (alreadyHasWeight == 0) {
weights[newWeightIndex] = curr_weight;
newWeightIndex++;
}
}
// end of data input
//an array of structs to store info about build MSTs (the weight of MST and maximum number of edges with same weights)
trees_p myTrees = (trees_p)malloc(newWeightIndex * sizeof(trees_t));
//Kruscal Algoritm lopp to find an MST for all present weights.
//We take each weight in 'weights' and change the weight of every edge in a graph that has weight equal to 'weights[i]' to -1
for (int i = 0; i < newWeightIndex; i++) {
int minimizedWeight = weights[i];
//array to store subsets of vertices
subset_p subsets = (subset_p)malloc(num_vertices * sizeof(subset_t));
//array to store MST Edges
edges_p mstEdges = (edges_p)malloc(num_vertices*sizeof(edges_t));
//array to store current edge
edges_p currentEdge = (edges_p)malloc(sizeof(edges_t));
//variable to keep the amount of weight that was subtracted (when setting some weights to -1)
//this is done in order to restore default weights after MST build finishes
int subtractedWeight = 0;
//variable to keep the number of edges which weight was changed to -1
int infEdgesTotal = 0;
//variable to keep the number of edges which weight was changed to -1 included to MST
int infEdgesTaken = 0;
//setting minimum weights
for (int i = 0; i < num_edges; i++) {
if (allEdges[i].weight == minimizedWeight) {
allEdges[i].weight = -1;
subtractedWeight += minimizedWeight+1;
infEdgesTotal++;
}
}
//sorting all graph edges in ascending order
qsort(allEdges, num_edges, sizeof(edges_t), myComp1);
//the kruskal algoritm itself - BEGINNING
for (int v = 0; v < num_vertices; v++) {
subsets[v].parent = v;
subsets[v].rank = 0;
}
int e = 0;
int currentIndex = 0;
int mstWeight = 0;
int mstEdgesCount = 0;
while (e < num_vertices - 1) {
currentEdge[0].src = allEdges[currentIndex].src;
currentEdge[0].dest = allEdges[currentIndex].dest;
currentEdge[0].weight = allEdges[currentIndex].weight;
int x = find(subsets, currentEdge[0].src);
int y = find(subsets, currentEdge[0].dest);
currentIndex++;
if (x != y) {
mstEdges[e].src = currentEdge[0].src;
mstEdges[e].dest = currentEdge[0].dest;
mstEdges[e].weight = currentEdge[0].weight;
mstWeight += mstEdges[e].weight;
mstEdgesCount++;
if (mstEdges[e].weight == -1) {
infEdgesTaken++;
}
e++;
Union(subsets, x, y);
}
}
free(subsets);
//the kruskal algoritm itself - END
//Restoring default weights
for (int i = 0; i < num_edges; i++) {
if (allEdges[i].weight == -1) {
allEdges[i].weight += minimizedWeight+1;
}
}
//Calculating built MST weight
mstWeight += subtractedWeight/infEdgesTotal*infEdgesTaken;
//an array to store all weight values in MST and a number of edges in MST with that weight
weights_p myWeights = (weights_p)malloc(mstEdgesCount*sizeof(weights_t));
//a variable to store the number of different weight values in MST
int num_weights = 0;
//filling 'myWeights' array
for(int i = 0; i < mstEdgesCount; i++) {
myWeights[i].weight = -100;
}
for (int i = 0; i < mstEdgesCount; i++) {
for (int j = 0; j < i + 1; j++) {
if (myWeights[j].weight == -100) {
myWeights[j].weight = mstEdges[i].weight;
myWeights[j].occurCount = 1;
num_weights++;
break;
} else if (myWeights[j].weight != mstEdges[i].weight){
continue;
} else {
myWeights[j].occurCount++;
break;
}
}
}
free(currentEdge);
//sorting all present weights by descending number of edges with that weight
qsort(myWeights, num_weights, sizeof(weights_t), myComp2);
//a variable to store a maximum number of weight occuriences in MST
int mostOccs = myWeights[0].occurCount;
free(myWeights);
free(mstEdges);
//adding info about current MST into 'myTrees' array
myTrees[i].totalWeight = mstWeight;
myTrees[i].mostOccurNumber = mostOccs;
}
// End of Krushkal Algorithm iteration
free(weights);
free(allEdges);
//sorting 'myTrees' array to get an MST with maximum number of same-edge occuriences
//and lowest weight in the top
qsort(myTrees, newWeightIndex, sizeof(trees_t), myComp3);
//outputing the result
printf ("%d",myTrees[0].totalWeight);
free(myTrees);
system("pause");
return 0;
}
Now there seems to be too many loops, but honestly, I don't know how I can simplify the algorithm even more.
I really need some suggestions about how to enhance the performance of this solution. May be there are some obvious things I can't see.
Thank you in advance!

Related

Optimization of program processing structured input for large data

I have this one task. To make it more clear, I am gonna use picture below as an example. Input and output is separated with dotted line. First line of input is number N - number of sets. For every set, it's first line are 2 numbers - first one declares how many numbers am I gonna process and second one is number of intervals. Second line specifies the numbers to process and third line contains 2 numbers X and Y, which create and interval. For every interval I have to output 3 numbers - lowest number on interval, index of highest number on interval and XOR of all numbers. Everything is running fine except it is really slow for big data and I have no idea how to make work faster. I have attached my code and large data input as well.
input.txt
#include <stdio.h>
#include <stdlib.h>
typedef struct {
int id;
int index;
} Censor;
int Xor(const int x, const int y, const Censor array[]) {
int xor = array[x].id;
if (x == y) {
return xor;
}
for (int i = x + 1; i <= y; i++) {
xor ^= array[i].id;
}
return xor;
}
int int_cmp(const void *a, const void *b) {
const Censor *ia = (const Censor *)a;
const Censor *ib = (const Censor *)b;
return (ia->id - ib->id);
}
int LowestId(const int x, const int y, Censor array[]) {
int id = array[x].id;
if (x == y) {
return id;
}
qsort(array, y - x + 1, sizeof(Censor), int_cmp);
return array[0].id;
}
int HighestIdIndex(const int x, const int y, Censor array[]) {
int index = array[x].index;
if (x == y) {
return index;
}
qsort(array, y - x + 1, sizeof(Censor), int_cmp);
return array[y].index;
}
int main() {
int t, n, q, b, e;
int max = 100;
int count = 0;
int *output = (int *)malloc(max * sizeof(output));
scanf("%d", &t); //number of sets
for (int i = 0; i < t; i++) {
scanf("%d %d", &n, &q);
//I am making 3 separate arrays for numbers, because some of them are being sorted and some of them not
Censor lowest_id[n];
Censor highest_id_index[n];
Censor xor[n];
//This loop fills arrays with the numbers to be processed
for (int j = 0; j < n; j++) {
scanf("%d", &(lowest_id[j].id));
lowest_id[j].index = j;
highest_id_index[j].id = lowest_id[j].id;
highest_id_index[j].index = j;
xor[j].id = lowest_id[j].id;
xor[j].index = j;
}
// Now I am scanning intervals and creating output. Output is being stored in one dynamically allocated array.
for (int k = 0; k < q; k++) {
scanf("%d %d", &b, &e);
if (count + 3 >= max) {
max *=2;
int *tmp = (int *)realloc(output, max * sizeof(tmp));
if (tmp == NULL) {
return 1;
} else {
output = tmp;
}
}
output[count++] = LowestId(b, e, lowest_id);
output[count++] = HighestIdIndex(b, e, highest_id_index);
output[count++] = Xor(b, e, xor);
}
}
printf("---------------------\n");
for (int i = 0; i < count; i++) {
printf("%d\n", output[i]);
}
free(output);
return 0;
}
Thanks #Dan MaĊĦek and #Alex Lop. Sorting subarray in this case was unnecessary. Much easier is to iterate through the subarray in linear complexity.

Ensemble averaging over ten independent realizations

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
//#include<igraph.h>
#define NUM_VERTICES1 15000// No. of data for Newman Watts to be used 15000:
//#define strings 10 // No. of base strings to be used 160:
//Function for generating infection rate randomly:
void unifRand(double *x, double *x1, double *x2)
{
int i;
const int n = 200; // 20
srand(unsigned(time(NULL)));
for(i = 0; i < n - 1; i++)
{
//x2[i] = rand()/double(RAND_MAX); //generate random number for choosing the infected neighbors(m):
x[i] = (0.2)+(0.4-0.2)*rand()/double(RAND_MAX);
x2[i] = 0.02; // fix the neighbor m and check:
x1[i] = log(1-x[i]);// Infection rate lambda:
printf("%lf\t%lf\t%lf\t%d\t%d\t\n", x[i], x1[i],x2[i],rand(), RAND_MAX);
}
}
// Function 2:
struct Edge {
int vertex;
struct Edge * next;
};
// Inserts Node to the Linked List by Head Insertion - O(1)
// Returns address of head which is the newly created node.
struct Edge * addEdge(struct Edge * currentHead, int newVertex)
{
struct Edge * newHead
= (struct Edge *) malloc(sizeof(struct Edge));
newHead->vertex = newVertex;
newHead->next = currentHead;
return newHead;
}
int main()
{
FILE *wg = NULL;
FILE *ob = NULL;
wg = fopen("ncwang1.txt","w");
ob = fopen("obs.txt","w");
if(wg == NULL)
{
printf("Error in opening file wg!\n");
}
if(ob == NULL)
{
printf("Error in opening file ob!\n");
}
int vertices = 200, edges = 400, i; // 20,50:(100,50)
int strings = 160;
int nobs = 10;
int v1, v2;
double j;
int k;
double t=0.0;
double dt=0.1;
double b;
double x[vertices], x1[vertices];
double x2[vertices];
unifRand(x,x1,x2);
// printf("Enter the Number of Vertices -\n");
// scanf("%d", &vertices);
//printf("\nEnter the Number of Edges -\n");
// scanf("%d", &edges);
struct Edge * adjacencyList[vertices + 1];
// Size is made (vertices + 1) to use the
// array as 1-indexed, for simplicity
// initialize array:
for (i = 0; i <= vertices; ++i) {
adjacencyList[i] = NULL;
}
for (i = 0; i <= edges; ++i) {
//scanf(%d%d", &v1, &v2);
v1 = rand()%200;
v2 = rand()%200;
// Adding edge v1 --> v2
// Add edge from v1 --> v2
if(v1 != v2)
adjacencyList[v1] = addEdge(adjacencyList[v1], v2);
// Adding edge v2 --> v1
// Remove this if you want a Directed Graph
adjacencyList[v2] = addEdge(adjacencyList[v2], v1);
}
// Printing Adjacency List
printf("\nAdjacency List -\n\n");
for(j = 0; j < strings; j++){
for (i = 0; i <= vertices; ++i) {
printf("adjacencyList[%d] -> ", i);
struct Edge * traverse = adjacencyList[i];
while (traverse != NULL)
{
b = (double)j/vertices;
fprintf(wg,"%d \t%d \t\t%0.6lf\t\t%0.1lf\t\t%0.8lf\t\n", i, traverse->vertex,-(x1[i]*(traverse->vertex))/100,b,
x[i]);
//fprintf(ob,"%d\t%0.2lf\t%0.1lf\n",k,(-log(1-x[i])*(traverse->vertex)),b);
printf("%d -> ", traverse->vertex);
traverse = traverse->next;
}
printf("NULL\n");
}
}
return 0;
fclose(wg);
fclose(ob);
wg = NULL;
ob = NULL;
}
I have written the above code for a network reconstruction performance from a reseach paper. I have to plot 'b' versus (-log(1-x[i])*(traverse->vertex)) from the output. The authors of the paper have mentioned that "the results are obtained by ensemble averaging over 10 independent realizations. How I can implement this in my code. As I am new to statistical physics, I do not know how to implement. Any suggestions will be helpful. The current output gives only a single line at b = 0.1, 0.2..1.0 which is not the expected output.c

Sorting one array and copying the order over to another

I have two arrays side by side, one lists the different teams and the other lists the scores. I am able to sort the order of scores in descending order. Can this order then be used to move the corresponding team to the correct position of the leader board? eg. move the two teams with 100 points (USA and Germany) to the top of the board
#include <stdio.h>
int main()
{
char teams[18][20]={"England","Ireland","Wales","Scotland","France","Italy","Germany","Uraguay","Belgium","USA","Mexico","Australia","Belize","Denmark","Sweden","Japan","South Africa","Algeria"};
int points[18]={43,5,77,23,89,0,100,46,94,100,45,55,32,65,11,37,26,78};
int i;
int j;
int a;
for (i = 0; i < 18; ++i)
{
printf("%i ",i+1);
printf("%s",teams[i]);
printf("\t%d\n", points[i]);
}
printf("\n");
for (i = 0; i < 18; ++i)
{
for (j = i + 1; j < 18; ++j)
{
if (points[i] < points[j])
{
a = points[i];
points[i] = points[j];
points[j] = a;
}
}
}
for (i = 0; i < 18; ++i)
{
printf("%i ",i+1);
printf("%s",teams[i]);
printf("\t%d\n", points[i]);
}
return 0;
}
As mentioned in a comment, the typical solution is to model your data as an array of structures, rather than separate arrays. This makes sense, since the data is associated with each other.
You'd have something like:
struct score {
const char *name;
int points;
} scores[] = {
{ "England", 43 },
{ "Ireland", 5 },
/* and so on */
};
Then you can use qsort() (or your own sorting code, if that's of interest) to sort entire structure instances, and the all the data will remain together since entire structures are being moved around.
Also arrange your teams array when sorting;
a = points[i];
b = teams[i];
points[i] = points[j];
teams[i] = teams[j];
points[j] = a;
teams[j] = b;
The obvious way (as pointed out by others) is embedding your arrays into a struct, but if you are forced to use parallel arrays you can build your own function and sort both arrays at once:
#include <stdio.h>
static int comp(const void *a, const void *b)
{
return *(int *)a - *(int *)b;
}
static void swap(int v1[], char *v2[], int a, int b)
{
int temp1;
char *temp2;
temp1 = v1[a];
v1[a] = v1[b];
v1[b] = temp1;
temp2 = v2[a];
v2[a] = v2[b];
v2[b] = temp2;
}
static void sort(int v1[], char *v2[], int left, int right, int (*comp)(const void *, const void *))
{
int i, last;
if (left >= right) return;
swap(v1, v2, left, (left + right) / 2);
last = left;
for (i = left + 1; i <= right; i++) {
if (comp(&v1[i], &v1[left]) < 0)
swap(v1, v2, ++last, i);
}
swap(v1, v2, left, last);
sort(v1, v2, left, last - 1, comp);
sort(v1, v2, last + 1, right, comp);
}
int main(void)
{
char *teams[] = {"England","Ireland","Wales","Scotland","France","Italy","Germany","Uraguay","Belgium","USA","Mexico","Australia","Belize","Denmark","Sweden","Japan","South Africa","Algeria"};
int points[] = {43,5,77,23,89,0,100,46,94,100,45,55,32,65,11,37,26,78};
size_t i, n = sizeof(points) / sizeof(*points);
sort(points, teams, 0, n - 1, comp);
for (i = 0; i < n; i++) {
printf("%s->%d\n", teams[i], points[i]);
}
return 0;
}
Output:
Italy->0
Ireland->5
Sweden->11
Scotland->23
South Africa->26
Belize->32
Japan->37
England->43
Mexico->45
Uraguay->46
Australia->55
Denmark->65
Wales->77
Algeria->78
France->89
Belgium->94
Germany->100
USA->100

Shorter way to get 5 highest and 5 lowest values without changing the stack

So here is my code . I am trying to find a short way to make this programme work withouth changing any of the arregment.I have been tought the buble way i think its called to arrange a group from highest to lowest but it clearly say in my given orders not to change the entire group.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int randomInRange (unsigned int min, unsigned int max)
{
//srand(time(NULL));
int base_random = rand();
if (RAND_MAX == base_random) return randomInRange(min, max);
int range = max + 1 - min,
remainder = RAND_MAX % range,
bucket = RAND_MAX / range;
if (base_random < RAND_MAX - remainder) {
return min + base_random/bucket;
} else {
return randomInRange (min, max);
}
}
int main()
{
int ari,i,min,max;
printf("Gi'me length of the group")
scanf("%d",&ari);
int pinakas[ari];
printf("Gi'me lowest and highest values");
scanf("%d",&min);
scanf("%d",&max);
for(i = 0; i < ari; i++)
{
pinakas[ari] = randomInRange(min,max);
}
int el,meg,c;
el = max+1;
meg = min-1;
c = 0;
printf("Highest Lowest");
while( c != 4;)
{
for(i = 0; i < ari; i++)
{
if(el > pinakas[ari])
{
el = pinakas[ari];
}
if( meg < pinakas[ari])
{
meg = pinakas[ari];
}
if(i == 4)
{
printf("%d %d",el,meg);
( is there something that i can put here is order to make el,meg to go for the second lowest ,second highest? and so on till i get the 5 highest and 5 lowests.Keep in mind the the lowest length of my group will be pinakas[5].)
}
}
c++;
}
For each item in the array, up to 5 comparisons are done for the min list and 5 for the max list.
Suggest calling a function to do this in a tidy fashion.
#include<assert.h>
// `list` is `const` as OP says "withouth changing any of the arregment".
void sort_ends(const int *list, size_t listlen, int *minlist, int *maxlist,
size_t mlen) {
assert(list);
assert(minlist);
assert(maxlist);
assert(mlen >= 1);
assert(listlen >= mlen);
minlist[0] = list[0];
// For each element after the first ...
for (size_t i = 1; i < listlen; i++) {
int mincandidate = list[i];
size_t mini = i;
if (mini > mlen) mini = mlen;
do {
mini--;
if (mincandidate >= minlist[mini])
break;
// swap mincandidate and minlist[mini]
int t = mincandidate;
mincandidate = minlist[mini];
minlist[mini] = t;
} while (mini > 0);
}
// Do similar for maxlist, left for OP
}
int main() {
int ari;
// ...
int pinakas[ari];
// ...
int mlen = 5;
int minlist[mlen];
int maxlist[mlen];
sort_ends(pinakas, ari, minlist, maxlist, mlen);
return 0;
}
Alternative approach, find min index and then memove().

Prim's algorithm for MST, Adjacency List Implementation in C

I have this question for my programming class which I have been struggling to complete for the past day ... and I have no real idea what to do.
I understand the basic concept of Prim's algorithm:
1. Start at an arbitrary node (the first node will do) and
add all of its links onto a list.
2. Add the smallest link (which doesn't duplicate an existing path)
in the MST, to the Minimum Spanning Tree.
Remove this link from the list.
3. Add all of the links from the newly linked node onto the list
4. repeat steps 2 & 3 until MST is achieved
(there are no nodes left unconnected).
I have been given this implementation of a Graph (using an Adjacency List) to implement Prim's algorithm on. The problem is I don't really understand the implementation. My understanding of the implementation so far is as follows:
Being an adjacency list, we have all the nodes in array form: Linked to this is a list of links, containing details of the weight, the destination, and a pointer to the rest of the links of the specific node:
Something that looks a bit like this:
[0] -> [weight = 1][Destination = 3] -> [weight = 6][Destination = 4][NULL]
[1] -> [weight = 4][Destination = 3][NULL]
and so on...
We also have an "Edge" struct, which I think is supposed to make things simpler for the implementation, but I'm not really seeing it.
Here is the code given:
GRAPH.h interface:
typedef struct {
int v;
int w;
int weight;
} Edge;
Edge EDGE (int, int, int);
typedef struct graph *Graph;
Graph GRAPHinit (int);
void GRAPHinsertE (Graph, Edge);
void GRAPHremoveE (Graph, Edge);
int GRAPHedges (Edge [], Graph g);
Graph GRAPHcopy (Graph);
void GRAPHdestroy (Graph);
int GRAPHedgeScan (Edge *);
void GRAPHEdgePrint (Edge);
int GRAPHsearch (Graph, int[]);
Graph GRAPHmst (Graph);
Graph GRAPHmstPrim (Graph);
#define maxV 8
GRAPH.c implementation:
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
#define exch(A, B) { Edge t = A; A = B; B = t; }
#define max(A,B)(A>B?A:B)
#define min(A,B)(A<B?A:B)
typedef struct node *link;
struct node {
int v;
int weight;
link next;
};
struct graph {
int V;
int E;
link *adj;
};
static void sortEdges (Edge *edges, int noOfEdges);
static void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent);
Edge EDGE (int v, int w, int weight) {
Edge e = {v, w, weight};
return e;
}
link NEW (int v, int weight, link next) {
link x = malloc (sizeof *x);
x->v = v;
x->next = next;
x->weight = weight;
return x;
}
Graph GRAPHinit (int V) {
int v;
Graph G = malloc (sizeof *G);
// Set the size of the graph, = number of verticies
G->V = V;
G->E = 0;
G->adj = malloc (V * sizeof(link));
for (v = 0; v < V; v++){
G->adj[v] = NULL;
}
return G;
}
void GRAPHdestroy (Graph g) {
// not implemented yet
}
void GRAPHinsertE(Graph G, Edge e){
int v = e.v;
int w = e.w;
int weight = e.weight;
G->adj[v] = NEW (w, weight, G->adj[v]);
G->adj[w] = NEW (v, weight, G->adj[w]);
G->E++;
}
void GRAPHremoveE(Graph G, Edge e){
int v = e.v;
int w = e.w;
link *curr;
curr = &G->adj[w];
while (*curr != NULL){
if ((*curr)->v == v) {
(*curr) = (*curr)->next;
G->E--;
break;
}
curr= &((*curr)->next);
}
curr = &G->adj[v];
while (*curr != NULL){
if ((*curr)->v == w) {
(*curr) = (*curr)->next;
break;
}
curr= &((*curr)->next);
}
}
int GRAPHedges (Edge edges[], Graph g) {
int v, E = 0;
link t;
for (v = 0; v < g->V; v++) {
for (t = g->adj[v]; t != NULL; t = t->next) {
if (v < t->v) {
edges[E++] = EDGE(v, t->v, t->weight);
}
}
}
return E;
}
void GRAPHEdgePrint (Edge edge) {
printf ("%d -- (%d) -- %d", edge.v, edge.weight, edge.w);
}
int GRAPHedgeScan (Edge *edge) {
if (edge == NULL) {
printf ("GRAPHedgeScan: called with NULL \n");
abort();
}
if ((scanf ("%d", &(edge->v)) == 1) &&
(scanf ("%d", &(edge->w)) == 1) &&
(scanf ("%d", &(edge->weight)) == 1)) {
return 1;
} else {
return 0;
}
}
// Update the CC label for all the nodes in the MST reachable through the edge from-to
// Assumes graph is a tree, will not terminate otherwise.
void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent) {
link currLink = g->adj[to];
connectedComponent[to] = newVal;
while (currLink != NULL) {
if (currLink->v != from) {
updateConnectedComponent (g, to, currLink->v, newVal, connectedComponent);
}
currLink = currLink->next;
}
}
// insertion sort, replace with O(n * lon n) alg to get
// optimal work complexity for Kruskal
void sortEdges (Edge *edges, int noOfEdges) {
int i;
int l = 0;
int r = noOfEdges-1;
for (i = r-1; i >= l; i--) {
int j = i;
while ((j < r) && (edges[j].weight > edges[j+1].weight)) {
exch (edges[j], edges[j+1]);
j++;
}
}
}
Graph GRAPHmst (Graph g) {
Edge *edgesSorted;
int i;
int *connectedComponent = malloc (sizeof (int) * g->V);
int *sizeOfCC = malloc (sizeof (int) * g->V);
Graph mst = GRAPHinit (g->V);
edgesSorted = malloc (sizeof (*edgesSorted) * g->E);
GRAPHedges (edgesSorted, g);
sortEdges (edgesSorted, g->E);
// keep track of the connected component each vertex belongs to
// in the current MST. Initially, MST is empty, so no vertex is
// in an MST CC, therefore all are set to -1.
// We also keep track of the size of each CC, so that we're able
// to identify the CC with fewer vertices when merging two CCs
for (i = 0; i < g->V; i++) {
connectedComponent[i] = -1;
sizeOfCC[i] = 0;
}
int currentEdge = 0; // the shortest edge not yet in the mst
int mstCnt = 0; // no of edges currently in the mst
int v, w;
// The MST can have at most min (g->E, g->V-1) edges
while ((currentEdge < g->E) && (mstCnt < g->V)) {
v = edgesSorted[currentEdge].v;
w = edgesSorted[currentEdge].w;
printf ("Looking at Edge ");
GRAPHEdgePrint (edgesSorted[currentEdge]);
if ((connectedComponent[v] == -1) ||
(connectedComponent[w] == -1)) {
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
if (connectedComponent[v] == connectedComponent[w]) {
connectedComponent[v] = mstCnt;
connectedComponent[w] = mstCnt;
sizeOfCC[mstCnt] = 2; // initialise a new CC
} else {
connectedComponent[v] = max (connectedComponent[w], connectedComponent[v]);
connectedComponent[w] = max (connectedComponent[w], connectedComponent[v]);
sizeOfCC[connectedComponent[w]]++;
}
printf (" is in MST\n");
} else if (connectedComponent[v] == connectedComponent[w]) {
printf (" is not in MST\n");
} else {
printf (" is in MST, connecting two msts\n");
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
// update the CC label of all the vertices in the smaller CC
// (size is only important for performance, not correctness)
if (sizeOfCC[connectedComponent[w]] > sizeOfCC[connectedComponent[v]]) {
updateConnectedComponent (mst, v, v, connectedComponent[w], connectedComponent);
sizeOfCC[connectedComponent[w]] += sizeOfCC[connectedComponent[v]];
} else {
updateConnectedComponent (mst, w, w, connectedComponent[v], connectedComponent);
sizeOfCC[connectedComponent[v]] += sizeOfCC[connectedComponent[w]];
}
}
currentEdge++;
}
free (edgesSorted);
free (connectedComponent);
free (sizeOfCC);
return mst;
}
// my code so far
Graph GRAPHmstPrim (Graph g) {
// Initializations
Graph mst = GRAPHinit (g->V); // graph to hold the MST
int i = 0;
int nodeIsConnected[g->V];
// initially all nodes are not connected, initialize as 0;
for(i = 0; i < g->V; i++) {
nodeIsConnected[i] = 0;
}
// extract the first vertex from the graph
nodeIsConnected[0] = 1;
// push all of the links from the first node onto a temporary list
link tempList = newList();
link vertex = g->adj[0];
while(vertex != NULL) {
tempList = prepend(tempList, vertex);
vertex = vertex->next;
}
// find the smallest link from the node;
mst->adj[0] =
}
// some helper functions I've been writing
static link newList(void) {
return NULL;
}
static link prepend(link list, link node) {
link temp = list;
list = malloc(sizeof(list));
list->v = node->v;
list->weigth = node->weight;
list->next = temp;
return list;
}
static link getSmallest(link list, int nodeIsConnected[]) {
link smallest = list;
while(list != NULL){
if((list->weight < smallest->weight)&&(nodeIsConnected[list->v] == 0)) {
smallest = list;
}
list = list->next;
}
if(nodeIsConnected[smallest->v] != 0) {
return NULL;
} else {
return smallest;
}
}
For clarity, file to obtain test data from file:
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
// call with graph_e1.txt as input, for example.
//
int main (int argc, char *argv[]) {
Edge e, *edges;
Graph g, mst;
int graphSize, i, noOfEdges;
if (argc < 2) {
printf ("No size provided - setting max. no of vertices to %d\n", maxV);
graphSize = maxV;
} else {
graphSize = atoi (argv[1]);
}
g = GRAPHinit (graphSize);
printf ("Reading graph edges (format: v w weight) from stdin\n");
while (GRAPHedgeScan (&e)) {
GRAPHinsertE (g, e);
}
edges = malloc (sizeof (*edges) * graphSize * graphSize);
noOfEdges = GRAPHedges (edges, g);
printf ("Edges of the graph:\n");
for (i = 0; i < noOfEdges; i++) {
GRAPHEdgePrint (edges[i]);
printf ("\n");
}
mst = GRAPHmstPrim (g);
noOfEdges = GRAPHedges (edges, mst);
printf ("\n MST \n");
for (i = 0; i < noOfEdges; i++) {
GRAPHEdgePrint (edges[i]);
printf ("\n");
}
GRAPHdestroy (g);
GRAPHdestroy (mst);
free (edges);
return EXIT_SUCCESS;
}
Thanks in advance.
Luke
files in full: http://www.cse.unsw.edu.au/~cs1927/12s2/labs/13/MST.html
UPDATE: I have had another attempt at this question. Here is the updated code (One edit above to change the graph_client.c to use "GRAPHmstPrim" function that I have written.
GRAPH_adjlist.c::
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
#define exch(A, B) { Edge t = A; A = B; B = t; }
#define max(A,B)(A>B?A:B)
#define min(A,B)(A<B?A:B)
typedef struct _node *link;
struct _node {
int v;
int weight;
link next;
}node;
struct graph {
int V;
int E;
link *adj;
};
typedef struct _edgeNode *edgeLink;
struct _edgeNode {
int v;
int w;
int weight;
edgeLink next;
}edgeNode;
static void sortEdges (Edge *edges, int noOfEdges);
static void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent);
Edge EDGE (int v, int w, int weight) {
Edge e = {v, w, weight};
return e;
}
link NEW (int v, int weight, link next) {
link x = malloc (sizeof *x);
x->v = v;
x->next = next;
x->weight = weight;
return x;
}
Graph GRAPHinit (int V) {
int v;
Graph G = malloc (sizeof *G);
G->V = V;
G->E = 0;
G->adj = malloc (V * sizeof(link));
for (v = 0; v < V; v++){
G->adj[v] = NULL;
}
return G;
}
void GRAPHdestroy (Graph g) {
// not implemented yet
}
void GRAPHinsertE(Graph G, Edge e){
int v = e.v;
int w = e.w;
int weight = e.weight;
G->adj[v] = NEW (w, weight, G->adj[v]);
G->adj[w] = NEW (v, weight, G->adj[w]);
G->E++;
}
void GRAPHremoveE(Graph G, Edge e){
int v = e.v;
int w = e.w;
link *curr;
curr = &G->adj[w];
while (*curr != NULL){
if ((*curr)->v == v) {
(*curr) = (*curr)->next;
G->E--;
break;
}
curr= &((*curr)->next);
}
curr = &G->adj[v];
while (*curr != NULL){
if ((*curr)->v == w) {
(*curr) = (*curr)->next;
break;
}
curr= &((*curr)->next);
}
}
int GRAPHedges (Edge edges[], Graph g) {
int v, E = 0;
link t;
for (v = 0; v < g->V; v++) {
for (t = g->adj[v]; t != NULL; t = t->next) {
if (v < t->v) {
edges[E++] = EDGE(v, t->v, t->weight);
}
}
}
return E;
}
void GRAPHEdgePrint (Edge edge) {
printf ("%d -- (%d) -- %d", edge.v, edge.weight, edge.w);
}
int GRAPHedgeScan (Edge *edge) {
if (edge == NULL) {
printf ("GRAPHedgeScan: called with NULL \n");
abort();
}
if ((scanf ("%d", &(edge->v)) == 1) &&
(scanf ("%d", &(edge->w)) == 1) &&
(scanf ("%d", &(edge->weight)) == 1)) {
return 1;
} else {
return 0;
}
}
// Update the CC label for all the nodes in the MST reachable through the edge from-to
// Assumes graph is a tree, will not terminate otherwise.
void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent) {
link currLink = g->adj[to];
connectedComponent[to] = newVal;
while (currLink != NULL) {
if (currLink->v != from) {
updateConnectedComponent (g, to, currLink->v, newVal, connectedComponent);
}
currLink = currLink->next;
}
}
// insertion sort, replace with O(n * lon n) alg to get
// optimal work complexity for Kruskal
void sortEdges (Edge *edges, int noOfEdges) {
int i;
int l = 0;
int r = noOfEdges-1;
for (i = r-1; i >= l; i--) {
int j = i;
while ((j < r) && (edges[j].weight > edges[j+1].weight)) {
exch (edges[j], edges[j+1]);
j++;
}
}
}
Graph GRAPHmst (Graph g) {
Edge *edgesSorted;
int i;
int *connectedComponent = malloc (sizeof (int) * g->V);
int *sizeOfCC = malloc (sizeof (int) * g->V);
Graph mst = GRAPHinit (g->V);
edgesSorted = malloc (sizeof (*edgesSorted) * g->E);
GRAPHedges (edgesSorted, g);
sortEdges (edgesSorted, g->E);
// keep track of the connected component each vertex belongs to
// in the current MST. Initially, MST is empty, so no vertex is
// in an MST CC, therefore all are set to -1.
// We also keep track of the size of each CC, so that we're able
// to identify the CC with fewer vertices when merging two CCs
for (i = 0; i < g->V; i++) {
connectedComponent[i] = -1;
sizeOfCC[i] = 0;
}
int currentEdge = 0; // the shortest edge not yet in the mst
int mstCnt = 0; // no of edges currently in the mst
int v, w;
// The MST can have at most min (g->E, g->V-1) edges
while ((currentEdge < g->E) && (mstCnt < g->V)) {
v = edgesSorted[currentEdge].v;
w = edgesSorted[currentEdge].w;
printf ("Looking at Edge ");
GRAPHEdgePrint (edgesSorted[currentEdge]);
if ((connectedComponent[v] == -1) ||
(connectedComponent[w] == -1)) {
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
if (connectedComponent[v] == connectedComponent[w]) {
connectedComponent[v] = mstCnt;
connectedComponent[w] = mstCnt;
sizeOfCC[mstCnt] = 2; // initialise a new CC
} else {
connectedComponent[v] = max (connectedComponent[w], connectedComponent[v]);
connectedComponent[w] = max (connectedComponent[w], connectedComponent[v]);
sizeOfCC[connectedComponent[w]]++;
}
printf (" is in MST\n");
} else if (connectedComponent[v] == connectedComponent[w]) {
printf (" is not in MST\n");
} else {
printf (" is in MST, connecting two msts\n");
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
// update the CC label of all the vertices in the smaller CC
// (size is only important for performance, not correctness)
if (sizeOfCC[connectedComponent[w]] > sizeOfCC[connectedComponent[v]]) {
updateConnectedComponent (mst, v, v, connectedComponent[w], connectedComponent);
sizeOfCC[connectedComponent[w]] += sizeOfCC[connectedComponent[v]];
} else {
updateConnectedComponent (mst, w, w, connectedComponent[v], connectedComponent);
sizeOfCC[connectedComponent[v]] += sizeOfCC[connectedComponent[w]];
}
}
currentEdge++;
}
free (edgesSorted);
free (connectedComponent);
free (sizeOfCC);
return mst;
}
edgeLink newEdgeList(void) {
return NULL;
}
edgeLink addEdgeList(edgeLink list, int node, link edge) {
printf("EdgeListStart");
edgeLink temp = list;
list = malloc(sizeof(edgeNode));
list->w = node;
list->v = edge->v;
list->weight = edge->weight;
list->next = temp;
printf("EdgeListEnd");
return list;
}
edgeLink findSmallest(edgeLink waitList, int nodeIsConnected[]) {
printf("SmallestSTart");
edgeLink smallest = waitList;
int small = 99999;
while(waitList != NULL) {
if((waitList->weight < small)&&(nodeIsConnected[waitList->v] == 0)) {
smallest = waitList;
small = smallest->weight;
} else {
printf("\n\n smallest already used %d", waitList->v);
}
waitList = waitList->next;
}
printf("SmallestEnd");
if(nodeIsConnected[smallest->v] == 0){
return smallest;
} else {
printf("Returning NULL");
return NULL;
}
}
link addList(edgeLink smallest, link list, int v) {
printf(":istsatt");
link temp = list;
list = malloc(sizeof(node));
list->v = v;
list->weight = smallest->weight;
list->next = temp;
printf("Listend");
return list;
}
Graph GRAPHmstPrim (Graph g) {
Graph mst = GRAPHinit (g->V); // graph to hold the MST
int i = 0;
int v = 0;
int w = 0;
int nodeIsConnected[g->V]; // array to hold whether a vertex has been added to MST
int loopStarted = 0;
edgeLink smallest = NULL;
// initially all nodes are not in the MST
for(i = 0; i < g->V; i++) {
nodeIsConnected[i] = 0;
}
while((smallest != NULL)||(loopStarted == 0)) {
printf("v is : %d", v);
// add the very first node to the MST
nodeIsConnected[v] = 1;
loopStarted = 1;
// push all of its links onto the list
link vertex = g->adj[v];
edgeLink waitList = newEdgeList();
while(vertex != NULL) {
waitList = addEdgeList(waitList, v, vertex);
vertex = vertex->next;
}
// find the smallest edge from the list
// which doesn't duplicate a connection
smallest = findSmallest(waitList, nodeIsConnected);
// no nodes don't duplicate a connection
// return the current MST
if(smallest == NULL){
return mst;
}
// otherwise add the attributes to the MST graph
w = smallest->w;
v = smallest->v;
mst->adj[v] = addList(smallest, mst->adj[v], w);
mst->adj[w] = addList(smallest, mst->adj[w], v);
}
return mst;
}
Summary of changes:
- Added edgeList to hold the edges that may be entered into the MST
- Array nodeIsConnected[] to track whether a node is in the MST
- Function to select the smallest node. If there is no node which doesn't duplicate a link this returns NULL
Seeing as this seems homework, I'm not going to give the entire answer in code. Your code seems to be on the right track. The next step you need is indeed to add the smallest link from your temporary list to to your mst. By adding the smallest one from your list, you are actually connecting your (partially built) mst to a node that is not yet in your mst. The link with the smallest weight will always be the cheapest way to connect the nodes in your mst to the other nodes.
When you add the smallest link, you are adding a node to the partially built tree and you need to update your temporary list. You need to add all the links of your new node to the list. Once you've done that, your temporary list contains all links of all nodes in your partially built mst. You continue that process of adding nodes until all nodes are in your mst.
When adding the cheapest link, you need to check if you are connecting a new node to your mst. The cheapest link could be connecting 2 nodes that are already in your mst. If so, that link needs to be skipped and you take the next cheapest one. There are actually several ways of handling this. You could maintain a set/vector of nodes that are already in your mst, maintain a vector of booleans to track the status of a node or make sure your temporary list only contains links that connect new nodes (although this is the most intensive approach).

Resources