OpenACC must have routine information error - c

I am trying to parallelize a simple mandelbrot c program, yet I get this error that has to do with not including acc routine information. Also, I am not sure whether I should be copying data in and out of the parallel section. PS I am relatively new to parallel programming, so any advice with learning it would be appreciated.
(Warning when compiled)
PGC-S-0155-Procedures called in a compute region must have acc routine information: fwrite (mandelbrot.c: 88)
PGC-S-0155-Accelerator region ignored; see -Minfo messages (mandelbrot.c: 51)
main:
51, Accelerator region ignored
88, Accelerator restriction: call to 'fwrite' with no acc routine information
PGC/x86-64 Linux 16.10-0: compilation completed with severe errors
Here is my code:
#include <stdio.h>
#include <math.h>
int main()
{
/* screen ( integer) coordinate */
int iX,iY;
const int iXmax = 800;
const int iYmax = 800;
/* world ( double) coordinate = parameter plane*/
double Cx,Cy;
const double CxMin=-2.5;
const double CxMax=1.5;
const double CyMin=-2.0;
const double CyMax=2.0;
/* */
double PixelWidth=(CxMax-CxMin)/iXmax;
double PixelHeight=(CyMax-CyMin)/iYmax;
/* color component ( R or G or B) is coded from 0 to 255 */
/* it is 24 bit color RGB file */
const int MaxColorComponentValue=255;
FILE * fp;
char *filename="new1.ppm";
char *comment="# ";/* comment should start with # */
static unsigned char color[3];
/* Z=Zx+Zy*i ; Z0 = 0 */
double Zx, Zy;
double Zx2, Zy2; /* Zx2=Zx*Zx; Zy2=Zy*Zy */
/* */
int Iteration;
const int IterationMax=200;
/* bail-out value , radius of circle ; */
const double EscapeRadius=2;
double ER2=EscapeRadius*EscapeRadius;
/*create new file,give it a name and open it in binary mode */
fp= fopen(filename,"wb"); /* b - binary mode */
/*write ASCII header to the file*/
fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
/* compute and write image data bytes to the file*/
#pragma acc parallel loop present(CyMin, iY, PixelHeight, iX, iXmax, CxMin, PixelWidth, Zx, Zy, Zx2, Zy2, Iteration, IterationMax)
for(iY=0;iY<iYmax;iY++)
{
Cy=CyMin + iY*PixelHeight;
if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */
#pragma acc loop
for(iX=0;iX<iXmax;iX++)
{
Cx=CxMin + iX*PixelWidth;
/* initial value of orbit = critical point Z= 0 */
Zx=0.0;
Zy=0.0;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
/* */
#pragma acc loop
for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
{
Zy=2*Zx*Zy + Cy;
Zx=Zx2-Zy2 +Cx;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
};
/* compute pixel color (24 bit = 3 bytes) */
if (Iteration==IterationMax)
{ /* interior of Mandelbrot set = black */
color[0]=0;
color[1]=0;
color[2]=0;
}
else
{ /* exterior of Mandelbrot set = white */
color[0]=255; /* Red*/
color[1]=255; /* Green */
color[2]=255;/* Blue */
};
/*write color to the file*/
fwrite(color,1,3,fp);
}
}
fclose(fp);
return 0;
}

Since you can't access a file from the GPU, you'll want to capture the results to arrays, copy them back to the host, and then output the results to a file.
Also, the "present" clause indicates that you've already copied the data over to the device and the program will abort if it's not there. Given the usage, I think you meant to use "private", which indicates that the variable should be private to the execution level. However scalars are private by default in OpenACC, so there's no need to manually privatize these variables. If you do manually privatize a variable, be sure to put it at the correct loop level. For example, if you privatize "Zx" on the outer loop, it will only private to that loop level. It would be shared by all the vectors of the inner loop! Again, here, it's best to just let the compiler handle privatizing the scalars, but just be mindful of where you privatize things in the few cases where you have to manually privatize variables.
Here's a corrected version of your code.
#include <stdio.h>
#include <math.h>
int main()
{
/* screen ( integer) coordinate */
int iX,iY;
const int iXmax = 800;
const int iYmax = 800;
/* world ( double) coordinate = parameter plane*/
double Cx,Cy;
const double CxMin=-2.5;
const double CxMax=1.5;
const double CyMin=-2.0;
const double CyMax=2.0;
/* */
double PixelWidth=(CxMax-CxMin)/iXmax;
double PixelHeight=(CyMax-CyMin)/iYmax;
/* color component ( R or G or B) is coded from 0 to 255 */
/* it is 24 bit color RGB file */
const int MaxColorComponentValue=255;
FILE * fp;
char *filename="new1.ppm";
char *comment="# ";/* comment should start with # */
static unsigned char color[3];
unsigned char red[iXmax][iYmax];
unsigned char blue[iXmax][iYmax];
unsigned char green[iXmax][iYmax];
/* Z=Zx+Zy*i ; Z0 = 0 */
double Zx, Zy;
double Zx2, Zy2; /* Zx2=Zx*Zx; Zy2=Zy*Zy */
/* */
int Iteration;
const int IterationMax=200;
/* bail-out value , radius of circle ; */
const double EscapeRadius=2;
double ER2=EscapeRadius*EscapeRadius;
/*create new file,give it a name and open it in binary mode */
fp= fopen(filename,"wb"); /* b - binary mode */
/*write ASCII header to the file*/
fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
/* compute and write image data bytes to the file*/
#pragma acc parallel loop copyout(red,blue,green)
for(iY=0;iY<iYmax;iY++)
{
Cy=CyMin + iY*PixelHeight;
if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */
#pragma acc loop
for(iX=0;iX<iXmax;iX++)
{
Cx=CxMin + iX*PixelWidth;
/* initial value of orbit = critical point Z= 0 */
Zx=0.0;
Zy=0.0;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
/* */
#pragma acc loop
for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
{
Zy=2*Zx*Zy + Cy;
Zx=Zx2-Zy2 +Cx;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
};
/* compute pixel color (24 bit = 3 bytes) */
if (Iteration==IterationMax)
{ /* interior of Mandelbrot set = black */
red[iX][iY]=0;
blue[iX][iY]=0;
green[iX][iY]=0;
}
else
{ /* exterior of Mandelbrot set = white */
red[iX][iY]=255;
blue[iX][iY]=255;
green[iX][iY]=255;
}; }
}
/*write color to the file*/
for(iY=0;iY<iYmax;iY++) {
for(iX=0;iX<iXmax;iX++) {
color[0] = red[iX][iY];
color[1] = blue[iX][iY];
color[2] = green[iX][iY];
fwrite(color,1,3,fp);
}
}
fclose(fp);
return 0;
}

When using OpenACC, the parallel regions are offloaded to a device, like a GPU. GPU devices normally don't have access to the entire system or the IO, and have a reduced subset of the standard library implemented.
In your case, the fwrite function call cannot be offloaded to a device, since you cannot access the disk from the accelerator.
You could do that in OpenMP, where parallel regions are executed on CPU or MIC threads, which typically have access to the entire system library.
The acc routine directive that PGC is suggesting would allow you to annotate any function to create a device version of it.

Related

How to implement X11(return colour of a screen pixel) C code for luajit's ffi?

I want to record small section of my screen with luajit.
Haven't found any module for that. And there are barely any documentations/tutorials/examples about luajit's ffi on the web aside from http://luajit.org/ext_ffi.html which doesn't provide any examples of using other C libraries.
I have a C code snippet that works native. How would you implement the C code for luajit's ffi?
Luajit example code:
--ffi part
local screen = {}
for y = 1, 100 do
for x = 1, 100 do
local r, g, b = ffi.C.getpixel(x, y)
table.insert(screen, r)
end
end
C code snippet:
#include <stdio.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
int main()
{
XColor c;
Display *d = XOpenDisplay((char *) NULL);
int x=1920/2; // Pixel x
int y=1080/2; // Pixel y
XImage *image;
image = XGetImage (d, XRootWindow (d, XDefaultScreen (d)), x, y, 1, 1, AllPlanes, XYPixmap);
c.pixel = XGetPixel (image, 0, 0);
XFree (image);
XQueryColor (d, XDefaultColormap(d, XDefaultScreen (d)), &c);
printf("%d %d %d\n", c.red/256, c.green/256, c.blue/256);
return 0;
}
Basically you only have to copy all the declarations from the headers into the ffi.cdef section and then call these names through a handle to the library. In principle you can translate the C code one to one, with the exception of taking the address of a variable. However, this is documented in the FFI tutorial you linked (http://luajit.org/ext_ffi_tutorial.html#idioms)
C code Lua code
Functions with outargs int len = x; local len = ffi.new("int[1]", x)
void foo(int *inoutlen); foo(&len); foo(len)
y = len; y = len[0]
Here is your C code in LuaJIT. I didn't copy the definitions for Display and XImage because we never access their members and only use pointers to them, so they remain opaque structs.
local ffi = assert(require("ffi"))
ffi.cdef[[
// Types from various headers
typedef struct _Display Display;
typedef struct _XImage XImage;
typedef struct {
unsigned long pixel;
unsigned short red, green, blue;
char flags; /* do_red, do_green, do_blue */
char pad;
} XColor; // Xlib.h
typedef unsigned long XID; // Xdefs.h
typedef XID Window; // X.h
typedef XID Drawable; // X.h
typedef XID Colormap; // X.h
// Functions from Xlib.h
Display *XOpenDisplay(
char* /* display_name */
);
int XDefaultScreen(
Display* /* display */
);
Window XRootWindow(
Display* /* display */,
int /* screen_number */
);
XImage *XGetImage(
Display* /* display */,
Drawable /* d */,
int /* x */,
int /* y */,
unsigned int /* width */,
unsigned int /* height */,
unsigned long /* plane_mask */,
int /* format */
);
int XFree(
void* /* data */
);
int XQueryColor(
Display* /* display */,
Colormap /* colormap */,
XColor* /* def_in_out */
);
Colormap XDefaultColormap(
Display* /* display */,
int /* screen_number */
);
// Functions from Xutil.h
unsigned long XGetPixel(
XImage *ximage,
int x, int y);
]]
local X11 = assert(ffi.load("X11"))
local AllPlanes = -1 -- Xlib.h: #define AllPlanes ((unsigned long)~0L)
local XYPixmap = 1 -- X.h: #define XYPixmap 1
local c = ffi.new("XColor[1]")
local d = X11.XOpenDisplay(ffi.NULL)
local x = 1920 / 2
local y = 1080 / 2
local image = X11.XGetImage(d, X11.XRootWindow(d, X11.XDefaultScreen(d)), x, y, 1, 1, AllPlanes, XYPixmap)
c[0].pixel = X11.XGetPixel(image, 0, 0)
X11.XFree(image)
X11.XQueryColor(d, X11.XDefaultColormap(d, X11.XDefaultScreen(d)), c)
print(string.format("%d %d %d", c[0].red/256, c[0].green/256, c[0].blue/256))

C - Shuffling an Array of Structs

I'm doing a project for school in C and basically what I need to do is to create a grid of agents (Humans, Zombies or none) and randomly pick which ones I manually control or are "AI" controlled. Basically humans need to run from zombies and zombies have to chase humans to infect them, game ending when there are no humans left.
Problem is, before each turn it should be randomly selected who plays first, and for that I need to shuffle the agents (not touching the grid because the agent positions remain the same, the only thing that changes is their position in the array that I should be using to pick who plays first).
I'm having some troubles with the shuffle because I call the function and after I shuffle the agents I print their Ids. It just give me many many 0s and in between some random numbers like 3, 6, 10, etc and a few more only.
Here's the code:
main file:
#include "showworld.h"
#include "example.h"
#include "shuffle.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/** Horizontal world size. */
#define WORLD_X 20
/** Vertical world size. */
#define WORLD_Y 20
/**
* Structure defining agent properties.
*
* #note This is an example which will probably not work in a fully functional
* game. Students should develop their own implementation of
* ::get_agent_info_at() and agent/world data structures.
* */
typedef struct {
AGENT_TYPE type; /**< Agent type. */
unsigned char playable; /**< Is agent playable? */
unsigned short id; /**< Agent ID. */
} AGENT;
/**
* Structure defining world properties.
*
* #note This is an example which will probably not work in a fully functional
* game. Students should develop their own implementation of
* ::get_agent_info_at() and agent/world data structures.
* */
typedef struct {
AGENT *grid; /**< World is a grid composed of agents. */
unsigned int xsize; /**< Horizontal world size. */
unsigned int ysize; /**< Vertical world size. */
} WORLD;
/* This function is an implementation of the definition provided by the
* ::get_agent_info_at() function pointer. It only works for AGENT and WORLD
* example structures defined in this file. */
unsigned int example_get_ag_info(void *world, unsigned int x, unsigned int y);
int main() {
/* An instance of a WORLD structure. */
WORLD my_world;
/* An instance of a SHOWWORLD world display. */
SHOWWORLD *sw = NULL;
/* A by-dimensional array of agents, representing agents in a grid. */
AGENT agent_grid[WORLD_X][WORLD_Y];
/* Number of agents created so far. */
unsigned int nagents = 0;
/* Initialize world display. */
sw = showworld_new(WORLD_X, WORLD_Y, example_get_ag_info);
/* Initialize random number generator. */
srand(time(NULL));
/* **************************************************************** */
/* Cycle through all cells in grid and randomly place agents in it. */
/* **************************************************************** */
for (int i = 0; i < WORLD_X; ++i) {
for (int j = 0; j < WORLD_Y; ++j) {
/* Possible agent in grid. By default we assume there is none. */
AGENT ag = {None, 0, 0};
/* Obtain a probability between 0 and 99. */
unsigned char probability = rand() % 100;
/* There is 10% probability of creating an agent. */
if (probability < 10) {
/* If we got here, an agent will be placed at (i,j). */
/* Randomly define agent type. */
ag.type = (rand() % 2 == 0) ? Human : Zombie;
/* Give 10% probablity of agent being playable by user. */
ag.playable = (rand() % 10 == 0);
/* Assign agent ID and then increment number of agents so
far. */
ag.id = nagents++;
}
/* Assign possible agent to grid at (i,j). */
agent_grid[i][j] = ag;
}
}
/* ******************************* */
/* Populate the my_world variable. */
/* ******************************* */
/* A bidimensional array of agents can be interpreted as a pointer to
agents. */
my_world.grid = (AGENT *) agent_grid;
/* World size is defined by constants in this example. */
my_world.xsize = WORLD_X;
my_world.ysize = WORLD_Y;
/* ********************************************************************* */
/* Show world using the simple_show_world() function. This function can */
/* be used in the first part of the project. */
/* ********************************************************************* */
showworld_update(sw, &my_world);
shuffle(my_world.grid, nagents);
/* Before finishing, ask user to press ENTER. */
printf("Press ENTER to continue...");
getchar();
/* Destroy world display. */
showworld_destroy(sw);
/* Bye. */
return 0;
}
/**
* This function is an implementation of the ::get_agent_info_at() function
* definition. It only works for ::AGENT and ::WORLD structures defined in this
* example.
*
* It basically receives a pointer to a ::WORLD structure, obtains the AGENT
* structure in the given coordinates, and returns the agent information in a
* bit-packed `unsigned int`.
*
* #note This is an example which will probably not work in a fully functional
* game. Students should develop their own implementation of
* ::get_agent_info_at() and agent/world data structures.
*
* #param w Generic pointer to object representing the simulation world.
* #param x Horizontal coordinate of the simulation world from where to fetch
* the agent information.
* #param y Vertical coordinate of the simulation world from where to fetch
* the agent information.
* #return An integer containing bit-packed information about an agent, as
* follows: bits 0-1 (agent type), bit 2 (is agent playable), bits 3-18 (agent
* ID). Bits 19-31 are available for student-defined agent extensions.
* */
unsigned int example_get_ag_info(void *w, unsigned int x, unsigned int y) {
/* The agent information to return. */
unsigned int ag_info = 0;
/* Convert generic pointer to world to a WORLD object. */
WORLD *my_world = (WORLD *) w;
/* Check if the given (x,y) coordinates are within bounds of the world. */
if ((x >= my_world->xsize) || (y >= my_world->ysize)) {
/* If we got here, then the coordinates are off bounds. As such we will
report that the requested agent is of unknown type. No need to
specify agent ID or playable status, since the agent is unknown. */
ag_info = Unknown;
} else {
/* Given coordinates are within bounds, let's get and pack the request
agent information. */
/* Obtain agent at specified coordinates. */
AGENT ag = my_world->grid[x * my_world->xsize + y];
/* Is there an agent at (x,y)? */
if (ag.type == None) {
/* If there is no agent at the (x,y) coordinates, set agent type to
None. No need to specify agent ID or playable status, since
there is no agent here. */
ag_info = None;
} else {
/* If we get here it's because there is an agent at (x,y). Bit-pack
all the agent information as specified by the get_agent_info_at
function pointer definition. */
ag_info = (ag.id << 3) | (ag.playable << 2) | ag.type;
}
}
/* Return the requested agent information. */
return ag_info;
}
Here's shuffle function
#include "example.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
void shuffle(AGENT *agents, unsigned int nagents) {
printf("%s\n\n", "------------- Shuffling agents ----------------");
unsigned int i=0;
unsigned int j=0;
AGENT temp;
srand(time(NULL));
for (i = nagents - 1; i > 0; i--) {
j = (rand() % i);
temp = agents[i];
agents[i] = agents[j];
agents[j] = temp;
}
for (i = 0; i < nagents; i++) {
printf("\n\t%d", agents[i].id);
}
Here's showworld file:
#include "showworld.h"
#include <stdio.h>
#include <stdlib.h>
/* The implementation of `SHOWWORLD` type used in this simple text-based world
* visualization code. In this simple case, we only need to keep track of the
* world dimensions and of the function pointer which knows how to read an
* agent from the world data structure.
*
* For a more complex implementation, for example based on the g2 library,
* it would also be necessary to keep the g2 device.
* */
struct showworld {
unsigned int xdim;
unsigned int ydim;
get_agent_info_at aginfo_func;
};
/* Create a new display/visualization object for the simulation world.
*
* This function obeys the `showworld_new()` prototype defined in
* `showworld.h`. */
SHOWWORLD *showworld_new(
unsigned int xdim,
unsigned int ydim,
get_agent_info_at aginfo_func) {
SHOWWORLD *sw = NULL;
sw = malloc(sizeof(SHOWWORLD));
sw->xdim = xdim;
sw->ydim = ydim;
sw->aginfo_func = aginfo_func;
return sw;
}
/* Destroy a display/visualization object for the simulation world.
*
* This function obeys the `showworld_destroy()` prototype defined in
* `showworld.h`. */
void showworld_destroy(SHOWWORLD *sw) {
free(sw);
}
/* Update the simulation world display/visualization.
*
* This function obeys the `showworld_update()` prototype defined in
* `showworld.h`. */
void showworld_update(SHOWWORLD *sw, void *w) {
printf("\n");
/* Cycle through all the rows */
for (unsigned int y = 0; y < sw->ydim; ++y) {
/* Cycle through all the columns for the current row */
for (unsigned int x = 0; x < sw->xdim; ++x) {
/* Get state of the world (in bit packed fashion) using the user
supplied function. */
unsigned int item = sw->aginfo_func(w, x, y);
/* Extract the agent type (2 bits). */
AGENT_TYPE ag_type = item & 0x3;
/* Extract whether the agent is playable (1 bit). */
unsigned char playable = (item >> 2) & 0x1;
/* Extract the agent ID (16 bits). */
unsigned short ag_id = (item >> 3) & 0xFFFF;
/* Determine the agent type. */
switch (ag_type) {
/* If no agent is present at (x,y) just print a dot. */
case None:
printf(" . ");
break;
/* If human agent present at (x,y) print 'h' or 'H'. */
case Human:
if (playable) {
/* Uppercase 'H' for player-controlled human agent. */
printf("H");
} else {
/* Lowercase 'h' for AI-controlled human agent. */
printf("h");
}
/* Print the agent ID in front of the 'h'/'H'. */
printf("%02X ", ag_id);
break;
/* If zombie agent present at (x,y) print 'z' or 'Z'. */
case Zombie:
if (playable) {
/* Uppercase 'Z' for player-controlled zombie agent. */
printf("Z");
} else {
/* Lowercase 'z' for AI-controlled zombie agent. */
printf("z");
}
/* Print the agent ID in front of the 'h'/'H'. */
printf("%02X ", ag_id);
break;
/* Print '?' if unknown type detected. This should *never*
happen. */
default:
printf("? ");
}
}
/* Print two newlines after each row. */
printf("\n\n");
}
/* Print a newline after world is shown/updated. */
printf("\n");
}
Stripping all that code to the relevant:
AGENT agent_grid[WORLD_X][WORLD_Y];
int nagents = populate_grid(agent_grid, WORLD_X, WORLD_Y, 10);
shuffle(agent_grid, nagents);
Here, I've removed my_world to focus on the grid, and created a grid initialization function instead of doing the initialization inline. The hypothetical initialization function int populate_grid(AGENT *grid, int rows, int cols, int percent) fills a sample of the grid with agents (and the rest with AGENT objects of type ``NONE`). It then returns the sample size created.
Then, the grid is shuffled using precisely your call to shuffle, which takes an array of AGENT objects and shuffles them.
Isn't the problem obvious from that narrative? agent_grid is not an array of size nagents as expected by shuffle. It is a two-dimensional array of size WORLD_X * WORLD_Y. In practice, that means that you are shuffling the first 40 (or so) grid slots, leaving the other 360 untouched; we can expect that 90% of those grid slots are empty, which seems to match your described outcome ("many, many 0s").
That is a lot of code, but the first thing I would look for is bounds errors. Make sure your array indices are consistent and do not access items outside of the valid range in your arrays.
The next problem is your shuffle. Jeff Atwood wrote an entire blog about it.
Good luck!

For loop being skipped in c-file when run from bash

I am running a c file which generates data and writes it to text file. Below is a bash script which runs this file multiple times for different parameters. When I run the c code by itself there are no problems. When I run it with the script below no data is saved to the text file (they are still created). It seems to be that the for loop, which the printing to the text file lies in, is being skipped (ascertained by placing print statements before and after this for loop).
#!/bin/bash
make studentt_sampler
# arguments to pass to ./studentt_sampler
no_samples=5
nu=3.0
mu=1.0
sigma=1.0
data_files=("data_file_0p01.txt" "data_file_0p1.txt" "data_file_1.txt" "data_file_10.txt")
proposal_sigma=(0.01,0.1,1.0,10.0)
# arguments to pass to post_process.py
figure_files=("chain_0p01.pdf" "chain_0p1.pdf" "chain_1.pdf" "chain_10.pdf")
for i in `seq 0 3`;
do
#echo ${data_files[$i]}
./studentt_sampler no_samples nu mu sigma "${data_files[$i]}" "${proposal_sigma[$i]}" &
# ./post_process.py echo ${data_files[$i]} echo ${figure_files[$i]}
done
wait
The main function of the c file is
int main(int argc, char *argv[]) {
/* Initialization */
const gsl_rng_type * T;
gsl_rng * r;
/* set iteration variables and the order of the student-t distribution
* from the command line arguments */
int i, itr = atoi(argv[1]);
/* parameters of student t distributions */
double nu = atof(argv[2]);
double mu = atof(argv[3]);
double sigma = atof(argv[4]);
/* store the parameters in param_type struct */
struct param_type params = {nu,mu,sigma};
/* open text file for writing and make sure it works*/
printf("%s\n",argv[5]);
FILE *f = fopen(argv[5], "w");
if (f == NULL) {
printf("Error opening file!\n");
exit(1);
}
/* allocate memory for generator and set its seed */
r = gsl_rng_alloc(gsl_rng_mt19937);
gsl_rng_set(r,1);
/* Start initial value */
double x_cur = 1.0;
double proposal_sigma = atof(argv[6]);
double alpha;
double x_prop;
int accept; /* keep track of acceptance rate */
double u; /* to make decision of accept proposal or not */
double accept_prob;
/* Start the MCMC */
for (i=0;i<itr;i++) {
printf("here!!!\n");
/* propose a new x */
x_prop = gsl_ran_gaussian(r,proposal_sigma) + x_cur;
/* Calculate acceptance probability */
accept_prob = lklhood(x_prop, &params)/lklhood(x_cur, &params);
alpha = GSL_MIN(1.0,accept_prob);
/* Accept or not, decide */
u = gsl_ran_flat(r,0.0,1.0);
if (u < alpha) {
x_cur = x_prop;
accept = 1;
}/* print to data file */
else {
accept = 0;
}
fprintf(f," %.5f %i\n",x_cur,accept);
}
/* Clean up time */
fclose(f);
return 0;
}
I appreciate your help.
Is it possible that you forgot the $ in the variables you supply to the program, i.e. shouldn't it be:
./studentt_sampler $no_samples $nu $mu $sigma "${data_files[$i]}" "${proposal_sigma[$i]}" &

Is it possible to have Variable-Size Signals in C MEX S-Function?

I know that it is possible to write Level-2 MATLAB S-Functionswith variable-sized signals.
Is it also somehow possible to do that in C MEX S-Functions?
My data has a different size at each time step. This requirement originates from a compressing block which gets a fixed size signal (2D) as its input. However the output signal (1D / Vector) changes its size at every mdlOutput().
The comments of the question already answered it:
Yes it is possible!
Here my example:
// Required S-Function header
#define S_FUNCTION_LEVEL 2
#define S_FUNCTION_NAME sfunc_varsignal
#include "simstruc.h"
enum {INPUT_PORT = 0, NUM_INPUT_PORTS};
enum {OUTPUT_PORT = 0, NUM_OUPUT_PORTS};
/**
* "Specify the number of inputs, outputs, states, parameters, and other
* characteristics of the C MEX S-function"
*/
static void mdlInitializeSizes(SimStruct* S)
{
boolean_T boolret;
int_T intret;
// Parameter
ssSetNumSFcnParams(S, 0);
if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S))
{
return; // Parameter mismatch will be reported by Simulink
}
// Input port
boolret = ssSetNumInputPorts(S, NUM_INPUT_PORTS);
if (boolret == 0)
{
return;
}
ssSetInputPortDirectFeedThrough(S, INPUT_PORT, 1);
intret = ssSetInputPortDimensionInfo(S, INPUT_PORT, DYNAMIC_DIMENSION);
if (intret == 0)
{
ssWarning(S, "Input dimensions could not be set.");
}
_ssSetInputPortNumDimensions(S, INPUT_PORT, 1);
// Output port
boolret = ssSetNumOutputPorts(S, NUM_OUPUT_PORTS);
if (boolret == 0)
{
return;
}
intret = ssSetOutputPortDimensionInfo(S, OUTPUT_PORT, DYNAMIC_DIMENSION);
if (intret == 0)
{
ssWarning(S, "Output dimensions could not be set.");
}
_ssSetOutputPortNumDimensions(S, OUTPUT_PORT, 1);
// Sample Times
ssSetNumSampleTimes(S, 1);
// Dimension Modes of the Ports
ssSetInputPortDimensionsMode(S, INPUT_PORT, INHERIT_DIMS_MODE);
ssSetOutputPortDimensionsMode(S, OUTPUT_PORT, INHERIT_DIMS_MODE);
// This is required for any kind of variable size signal:
ssSetInputPortRequiredContiguous(S, INPUT_PORT, true);
ssSetOptions(S, 0);
// Note: In the doc of ssSetOutputPortWidth it is wriiten:
// "If the width is dynamically sized, the S-function must provide
// mdlSetOutputPortDimensionInfo and mdlSetDefaultPortDimensionInfo
// methods to enable the signal dimensions to be set correctly
// during signal propagation."
// However in the example sfun_varsize_concat1D this methods are
// not present. The function _ssSetOutputPortNumDimensions() may be sufficient
// This usgae of this function is copied from the example sfcndemo_varsize
}
#if defined(MATLAB_MEX_FILE)
/**
* "Set the width of an input port that accepts 1-D (vector) signals"
*/
#define MDL_SET_INPUT_PORT_WIDTH
static void mdlSetInputPortWidth(SimStruct* S, int_T port, int_T width)
{
// Set to the sugessted width (e.g. the output width
// from the connected block)
ssSetInputPortWidth(S, port, width);
// Check if the setting was sucessful
if (ssGetInputPortWidth(S, INPUT_PORT) != DYNAMICALLY_SIZED)
{
ssSetOutputPortWidth(S, OUTPUT_PORT, width);
}
return;
}
/**
* "Set the width of an output port that outputs 1-D (vector) signals"
*/
#define MDL_SET_OUTPUT_PORT_WIDTH
static void mdlSetOutputPortWidth(SimStruct* S, int_T port, int_T width)
{
// Nothing here, but its required since the output port is set as
// dynamically sized. But its size is set in mdlSetInputPortWidth()
UNUSED_ARG(S);
UNUSED_ARG(port);
UNUSED_ARG(width);
return;
}
#endif //defined(MATLAB_MEX_FILE)
/**
* "Specify the sample rates at which this C MEX S-function operates"
*/
static void mdlInitializeSampleTimes(SimStruct* S)
{
ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
ssSetOffsetTime(S, 0, 0.0);
ssSetModelReferenceSampleTimeDefaultInheritance(S);
}
/**
* "Compute the signals that this block emits."
*/
static void mdlOutputs(SimStruct* S, int_T tid)
{
UNUSED_ARG(tid);
const real_T* insignal = ssGetInputPortRealSignal(S, INPUT_PORT);
auto width = static_cast<const int>(insignal[0]);
// This function does the trick:
ssSetCurrentOutputPortDimensions(S, OUTPUT_PORT, 0, width);
// newWidth should be width
int_T newWidth = ssGetCurrentOutputPortDimensions(S, OUTPUT_PORT, 0 /* dimension ID */);
real_T* outsignal = ssGetOutputPortRealSignal(S, OUTPUT_PORT);
for (int i = 0; i < newWidth; i++)
{
*outsignal++ = i+1;
}
}
/**
* "Perform any actions required at termination of the simulation"
*/
static void mdlTerminate(SimStruct* S)
{
UNUSED_ARG(S);
}
// Required S-function trailer
#ifdef MATLAB_MEX_FILE /* Is this file being compiled as a MEX-file? */
#include "simulink.c" /* MEX-file interface mechanism */
#else
#include "cg_sfun.h" /* Code generation registration function */
#endif

a working non-recursive floodfill algorithm written in C?

I've been trying to find a working floodfill algorithm. Of the many algorithms I've tried only the 'recursive line fill' one behaves exactly as it should with the major caveat that it occasionally blows the stack. :(
I have tried many non-recursive implementations I've found and they have all been exceptionally tempermental: either they leave gaps in strange places, or flood the whole area (when they should be enclosed).
Anyone has a NON-recursive floodfill working sourcecode written in C (or c++ that isn't too heavily OOP and I can disentangle easily enough)?
Just implement a stack of int pairs with an array of some fixed size (maybe the size of the image in pixels or the square root of that, for example) for the stack and track the top with an int.
Here is some C# code that implements floodfill non-recursively:
private static void Floodfill(byte[,] vals, Point q, byte SEED_COLOR, byte COLOR)
{
int h = vals.GetLength(0);
int w = vals.GetLength(1);
if (q.Y < 0 || q.Y > h - 1 || q.X < 0 || q.X > w - 1)
return;
Stack<Point> stack = new Stack<Point>();
stack.Push(q);
while (stack.Count > 0)
{
Point p = stack.Pop();
int x = p.X;
int y = p.Y;
if (y < 0 || y > h - 1 || x < 0 || x > w - 1)
continue;
byte val = vals[y, x];
if (val == SEED_COLOR)
{
vals[y, x] = COLOR;
stack.Push(new Point(x + 1, y));
stack.Push(new Point(x - 1, y));
stack.Push(new Point(x, y + 1));
stack.Push(new Point(x, y - 1));
}
}
}
Here's some C++ code that does what you want. It uses a queue, and is more efficient about insertions into the queue.
connectedRegion(const Point& source, RegionType& region, const Color target)
{
Color src_color = color_of(source, region);
if (region.count(source) == 0 || src_color == target)
return;
std::queue<Point> analyze_queue;
analyze_queue.push(source);
while (!analyze_queue.empty())
{
if (color_of(analyze_queue.front()) != src_color)
{
analyze_queue.pop();
continue;
}
Point leftmost_pt = analyze_queue.front();
leftmost_pt.col -= 1;
analyze_queue.pop();
Point rightmost_pt = leftmost_pt;
rightmost_pt.col += 2;
while (color_of(leftmost_pt, region) == src_color)
--leftmost_pt.col;
while (color_of(rightmost_pt, region) == src_color)
++rightmost_pt.col;
bool check_above = true;
bool check_below = true;
Point pt = leftmost_pt;
++pt.col;
for (; pt.col < rightmost_pt.col; ++pt.col)
{
set_color(pt, region, target);
Point pt_above = pt;
--pt_above.row;
if (check_above)
{
if (color_of(pt_above, region) == src_color)
{
analyze_queue.push(pt_above);
check_above = false;
}
}
else // !check_above
{
check_above = (color_of(pt_above, region) != src_color);
}
Point pt_below = pt;
++pt_below.row;
if (check_below)
{
if (color_of(pt_below, region) == src_color)
{
analyze_queue.push(pt_below);
check_below = false;
}
}
else // !check_below
{
check_below = (color_of(pt_below, region) != src_color);
}
} // for
} // while queue not empty
return connected;
}
A quick googling brings up the Wikipedia article on Flood Fill which includes pseudocode implementations which are not recursive. Below is some code that could help get you started, a basic queue implementation in C:
typedef struct queue_ { struct queue_ *next; } queue_t;
typedef struct ffnode_ { queue_t node; int x, y; } ffnode_t;
/* returns the new head of the queue after adding node to the queue */
queue_t* enqueue(queue_t *queue, queue_t *node) {
if (node) {
node->next = queue;
return node;
}
return NULL;
}
/* returns the head of the queue and modifies queue to be the new head */
queue_t* dequeue(queue_t **queue) {
if (queue) {
queue_t *node = (*queue);
(*queue) = node->next;
node->next = NULL;
return node;
}
return NULL;
}
ffnode_t* new_ffnode(int x, int y) {
ffnode_t *node = (ffnode_t*)malloc(sizeof(ffnode_t));
node->x = x; node->y = y;
node->node.next = NULL;
return node;
}
void flood_fill(image_t *image, int startx, int starty,
color_t target, color_t replacement) {
queue_t *head = NULL;
ffnode_t *node = NULL;
if (!is_color(image, startx, starty, target)) return;
node = new_ffnode(startx, starty);
for ( ; node != NULL; node = (ffnode_t*)dequeue(&head)) {
if (is_color(image, node->x, node->y, target)) {
ffnode_t *west = node, *east = node;
recolor(image, node->x, node->y, replacement);
/* 1. move w to the west until the color of the node to the west
no longer matches target */
...
}
}
}
Isn't there a proof somewhere that all recursive functions can be implemented as an iterative function by using local data to mimic a stack? You could probably use std::vector to create stack-like behavior of the algorithm without blowing the stack since it will use the heap.
EDIT: I noticed you are using C, so instead of std::vector, you could just implement similar behavior via realloc as you need to add more elements to your local "stack" of whatever data structure you would use.
I do not know if my answer is perfectly relevant to the question you put, but hereafter I propose my C version of the Flood-Fill algorithm, which does not use recursive calls.
1-11-2017: NEW-VERSION; SUCCESFULLY TESTED WITH TWO BITMAPS.
It uses only a queue of the offsets of the new points, it works on the window: WinnOffs-(WinDimX,WinDimY) of the double-buffer: *VBuffer (copy of the screen or image) and, optionally, it write a mask of the flood-fill's result (*ExtraVBuff).
ExtraVBuff must be filled it with 0 before the call (if you don't need a mask you may set ExtraVBuff= NULL); using it after call you can do gradient floodfill or other painting effects. NewFloodFill works with 32 Bit per Pixel and it is a C function. I've reinvented this algorithm in 1991 (I wrote his in Pascal), but now it works in C with 32 Bit per Pixel; also not uses any functions calls, does only a division after each "pop" from queue, and never overflows the queue, that, if it is sized in the right way (about 1/4 of the pixels of the image), it allows always to fill correctly any area; I show before the c-function (FFILL.C), after the test program (TEST.C):
#define IMAGE_WIDTH 1024
#define IMAGE_HEIGHT 768
#define IMAGE_SIZE IMAGE_WIDTH*IMAGE_HEIGHT
#define QUEUE_MAX IMAGE_SIZE/4
typedef int T_Queue[QUEUE_MAX];
typedef int T_Image[IMAGE_SIZE];
void NewFloodFill(int X,
int Y,
int Color,
int BuffDimX,
int WinOffS,
int WinDimX,
int WinDimY,
T_Image VBuffer,
T_Image ExtraVBuff,
T_Queue MyQueue)
/* Replaces all pixels adjacent to the first pixel and equal to this; */
/* if ExtraVBuff == NULL writes to *VBuffer (eg BUFFER of 786432 Pixel),*/
/* otherwise prepare a mask by writing on *ExtraVBuff (such BUFFER must */
/* always have the same size as *VBuffer (it must be initialized to 0)).*/
/* X,Y: Point coordinates' of origin of the flood-fill. */
/* WinOffS: Writing start offset on *VBuffer and *ExtraVBuff. */
/* BuffDimX: Width, in number of Pixel (int), of each buffer. */
/* WinDimX: Width, in number of Pixel (int), of the window. */
/* Color: New color that replace all_Pixel == origin's_point. */
/* WinDimY: Height, in number of Pixel (int), of the window. */
/* VBuffer: Pointer to the primary buffer. */
/* ExtraVBuff: Pointer to the mask buffer (can be = NULL). */
/* MyQueue: Pointer to the queue, containing the new-points' offsets*/
{
int VBuffCurrOffs=WinOffS+X+Y*BuffDimX;
int PixelIn=VBuffer[VBuffCurrOffs];
int QueuePnt=0;
int *TempAddr=((ExtraVBuff) ? ExtraVBuff : VBuffer);
int TempOffs1;
int TempX1;
int TempX2;
char FLAG;
if (0<=X && X<WinDimX && 0<=Y && Y<WinDimY) do
{
/* Fill to left the current line */
TempX2=X;
while (X>=0 && PixelIn==VBuffer[VBuffCurrOffs])
{
TempAddr[VBuffCurrOffs--]=Color;
--X;
}
TempOffs1=VBuffCurrOffs+1;
TempX1=X+1;
/* Fill to right the current line */
VBuffCurrOffs+=TempX2-X;
X=TempX2;
while (X+1<WinDimX && PixelIn==VBuffer[VBuffCurrOffs+1])
{
++X;
TempAddr[++VBuffCurrOffs]=Color;
}
TempX2=X;
/* Backward scan of the previous line; puts new points offset in Queue[] */
if (Y>0)
{
FLAG=1;
VBuffCurrOffs-=BuffDimX;
while (X-->=TempX1)
{
if (PixelIn!=VBuffer[VBuffCurrOffs] ||
ExtraVBuff && Color==ExtraVBuff[VBuffCurrOffs])
FLAG=1;
else
if (FLAG)
{
FLAG=0;
if (QueuePnt<QUEUE_MAX)
MyQueue[QueuePnt++]=VBuffCurrOffs;
}
--VBuffCurrOffs;
}
}
/* Forward scan of the next line; puts new points offset in Queue[] */
if (Y<WinDimY-1)
{
FLAG=1;
VBuffCurrOffs=TempOffs1+BuffDimX;
X=TempX1;
while (X++<=TempX2)
{
if (PixelIn!=VBuffer[VBuffCurrOffs] ||
ExtraVBuff && Color==ExtraVBuff[VBuffCurrOffs])
FLAG=1;
else
if (FLAG)
{
FLAG=0;
if (QueuePnt<QUEUE_MAX)
MyQueue[QueuePnt++]=VBuffCurrOffs;
}
++VBuffCurrOffs;
}
}
/* Gets a new point offset from Queue[] */
if (--QueuePnt>=0)
{
VBuffCurrOffs=MyQueue[QueuePnt];
TempOffs1=VBuffCurrOffs-WinOffS;
X=TempOffs1%BuffDimX;
Y=TempOffs1/BuffDimX;
}
/* Repeat the main cycle until the Queue[] is not empty */
} while (QueuePnt>=0);
}
Here there is the test program:
#include <stdio.h>
#include <malloc.h>
#include "ffill.c"
#define RED_COL 0xFFFF0000
#define WIN_LEFT 52
#define WIN_TOP 48
#define WIN_WIDTH 920
#define WIN_HEIGHT 672
#define START_LEFT 0
#define START_TOP 671
#define BMP_HEADER_SIZE 54
typedef char T_Image_Header[BMP_HEADER_SIZE];
void main(void)
{
T_Image_Header bmpheader;
T_Image *image;
T_Image *mask;
T_Queue *MyQueue;
FILE *stream;
char *filename1="ffill1.bmp";
char *filename2="ffill2.bmp";
char *filename3="ffill3.bmp";
int bwritten;
int bread;
image=malloc(sizeof(*image));
mask=malloc(sizeof(*mask));
MyQueue=malloc(sizeof(*MyQueue));
stream=fopen(filename1,"rb");
bread=fread(&bmpheader, 1, BMP_HEADER_SIZE, stream);
bread=fread((char *)image, 1, IMAGE_SIZE<<2, stream);
fclose(stream);
memset(mask,0,IMAGE_SIZE<<2);
NewFloodFill(START_LEFT,
START_TOP,
RED_COL,
IMAGE_WIDTH,
IMAGE_WIDTH*WIN_TOP+WIN_LEFT,
WIN_WIDTH,
WIN_HEIGHT,
*image,
NULL,
*MyQueue);
stream=fopen(filename2,"wb+");
bwritten=fwrite(&bmpheader, 1, BMP_HEADER_SIZE, stream);
bwritten=fwrite((char *)image, 1, IMAGE_SIZE<<2, stream);
fclose(stream);
stream=fopen(filename3,"wb+");
bwritten=fwrite(&bmpheader, 1, BMP_HEADER_SIZE, stream);
bwritten=fwrite((char *)mask, 1, IMAGE_SIZE<<2, stream);
fclose(stream);
free(MyQueue);
free(mask);
free(image);
}
I've used, for the input of the test program shown, the follow Windows uncompressed .BMP image (ffill1.bmp):
Filled, by the test program shown, as follows (ffill2.bmp):
Using "mask" instead of NULL, the output bitmap is (ffill3.bmp):
You can convert any recursive algorithm to iterative by creating an explicit stack or queue and loading work onto it/pulling it off.
All you need is to choose a nice, compact representation of the work to be done. Worst case: create a struct holding the arguments you would normally pass to the recursive version...
We noticed that our floodfill implementation on 3d volumes was consuming way much memory; so we modified the code in the following ways (there was a vast improvement):
Create a sphere of radius = 10 voxs around the starting point, and mark all the voxels within that radius as "to be visited"
If the current voxel > threshold, insert 1.
Go to the neighbors [+1, -1, 0] (also check that one doesn't revisit any voxel), if the neighbor.getVoxVal = 0 (the initialization value for the target volume), then it falls at the boundary of the sphere, insert the coordinates in a different stack. (this would be the starting point for our next sphere)
radius = radius + 10 (voxels)
So at a time, our floodfill is working on a concentric sphere and filling things up, which is a part of the entire volume, and as I said, this has reduced the memory consumption drastically, but I am still searching for an implementation/idea that would be better.
I have a non-recursive flood fill, but I won't post it because it's the solution to a homework assignment. But here's a hint: depth-first search, which is the natural algorithm, uses far more auxiliary space than a breadth-first search. Here's what I wrote at the time (suitably expurgated):
I dare not try depth-first search by simple recursion; the depth of recursion is limited only by REDACTED, and my experiments show that an PROBLEM REDACTED could nevertheless require a stack depth of over a million. So I put the stack in an auxiliary data structure. Using an explicit stack actually makes it easy to try breadth-first search as well, and it turns out that breadth-first search can use forty times less space than depth-first search.
For my data structure I used the Seq_T from Dave Hanson's C Interfaces and Implementations; changing from depth-first to breadth-first requires changing just one function call.
You can quickly convert a recursive flood fill into an ultra-performant pseudo-recursive... Don't edit the lines,just add new lines:
place the recursive function in an XY loop for added structure.
record the found neighbors to a "found neighbors array"
instead of memory, so you will start packing the 4-16-64 style tree of the recursion into an XY array. memory usage goes from 1 gygabyte to 2 megabytes.
Also use a 2D array called "filled neighbors array"... abort the function for any pixels marked as filled in the "filled neighbors array", this uses 2 instructions for every duplicate, 20 instructions for every floodfill operation, and it iteratively fills leftwards and upwards like dominoes, insanely quickly.
1024x1024 uses about 1million *20 instructions which is 0.1 seconds for a single core.
I achieve 9 million filled pixels per second on an i7 in this way, i have a video as proof, and a blog page with code and theory explanations:
www.youtube.com/watch?v=4hQ1wA4Sl4c
and here is a page where I attempted to explain how it works.
http://unity3dmc.blogspot.com/2017/02/ultimate-3d-floodfill-scanline.html?m=1
And the code.
Recursions would be the fastest if they could stay organized.
If you recurse through a grid of data (image) you can store the processing of the recursions in grid format too, because the processed steps represent pixels from the grid, rather than explode the results into a tree format.
Below is my BFS based iterative c++ method for the flood fill problem:
// M is the input matrix, with every entry(pixel) have a color
// represented with an integer value.
// (x, y) row and column of seed point respectively
// k: The new color to fill the seed and its adjacent pixels
void floodFill(vector<vector<int>> &M, int x, int y, int k) {
queue<pair<int, int>> nodeQ;
nodeQ.push({x, y});
int oldCol = M[x][y];
while(!nodeQ.empty()) {
pair<int, int> currNode = nodeQ.front();
nodeQ.pop();
if(M[currNode.first][currNode.second] == oldCol) {
M[currNode.first][currNode.second] = k;
if(currNode.first > 0) nodeQ.push({currNode.first-1, currNode.second});
if(currNode.first < (M.size()-1)) nodeQ.push({currNode.first+1, currNode.second});
if(currNode.second > 0) nodeQ.push({currNode.first, currNode.second-1});
if(currNode.second < (M[0].size()-1)) nodeQ.push({currNode.first, currNode.second+1});
}
}
}
Here is a guide for a non-recursive routine which completes 10 million pixels per second: it's called marching-floodfills, what happens if you march the previously recursive routine forwards in a X-Y loop.
write your own memory, a 2D array to record verified spaces, and another array which records the complete filled image, and read and write to them using this loop system... it averages 20 instructions per pixel. i dealt with 2 billion voxel graphs at 5 million Voxels per second using above video logic.
I found this fill by Paul Heckbert to be the simplest non-recursive C implementation:
/*
* A Seed Fill Algorithm
* by Paul Heckbert
* from "Graphics Gems", Academic Press, 1990
*
* user provides pixelread() and pixelwrite() routines
*/
/*
* fill.c : simple seed fill program
* Calls pixelread() to read pixels, pixelwrite() to write pixels.
*
* Paul Heckbert 13 Sept 1982, 28 Jan 1987
*/
typedef struct { /* window: a discrete 2-D rectangle */
int x0, y0; /* xmin and ymin */
int x1, y1; /* xmax and ymax (inclusive) */
} Window;
typedef int Pixel; /* 1-channel frame buffer assumed */
Pixel pixelread(int x, int y);
void pixelwrite(int x, int y, Pixel p);
typedef struct {short y, xl, xr, dy;} Segment;
/*
* Filled horizontal segment of scanline y for xl<=x<=xr.
* Parent segment was on line y-dy. dy=1 or -1
*/
#define MAX 10000 /* max depth of stack */
#define PUSH(Y, XL, XR, DY) /* push new segment on stack */ \
if (sp<stack+MAX && Y+(DY)>=win->y0 && Y+(DY)<=win->y1) \
{sp->y = Y; sp->xl = XL; sp->xr = XR; sp->dy = DY; sp++;}
#define POP(Y, XL, XR, DY) /* pop segment off stack */ \
{sp--; Y = sp->y+(DY = sp->dy); XL = sp->xl; XR = sp->xr;}
/*
* fill: set the pixel at (x,y) and all of its 4-connected neighbors
* with the same pixel value to the new pixel value nv.
* A 4-connected neighbor is a pixel above, below, left, or right of a pixel.
*/
void fill(x, y, win, nv)
int x, y; /* seed point */
Window *win; /* screen window */
Pixel nv; /* new pixel value */
{
int l, x1, x2, dy;
Pixel ov; /* old pixel value */
Segment stack[MAX], *sp = stack; /* stack of filled segments */
ov = pixelread(x, y); /* read pv at seed point */
if (ov==nv || x<win->x0 || x>win->x1 || y<win->y0 || y>win->y1) return;
PUSH(y, x, x, 1); /* needed in some cases */
PUSH(y+1, x, x, -1); /* seed segment (popped 1st) */
while (sp>stack) {
/* pop segment off stack and fill a neighboring scan line */
POP(y, x1, x2, dy);
/*
* segment of scan line y-dy for x1<=x<=x2 was previously filled,
* now explore adjacent pixels in scan line y
*/
for (x=x1; x>=win->x0 && pixelread(x, y)==ov; x--)
pixelwrite(x, y, nv);
if (x>=x1) goto skip;
l = x+1;
if (l<x1) PUSH(y, l, x1-1, -dy); /* leak on left? */
x = x1+1;
do {
for (; x<=win->x1 && pixelread(x, y)==ov; x++)
pixelwrite(x, y, nv);
PUSH(y, l, x-1, dy);
if (x>x2+1) PUSH(y, x2+1, x-1, -dy); /* leak on right? */
skip: for (x++; x<=x2 && pixelread(x, y)!=ov; x++);
l = x;
} while (x<=x2);
}
}
source: https://github.com/erich666/GraphicsGems/blob/master/gems/SeedFill.c

Resources