Having some difficulty troubleshooting code I wrote in C to perform a logistic regression. While it seems to work on smaller, semi-randomized datasets, it stops working (e.g. assigning proper probabilities of belonging to class 1) at around the point where I pass 43,500 observations (determined by tweaking the number of observations created. When creating the 150 features used in the code, I do create the first two as a function of the number of observations, so I'm not sure if maybe that's the issue here, though I am using double precision. Maybe there's an overflow somewhere in the code?
The below code should be self-contained; it generates m=50,000 observations with n=150 features. Setting m below 43,500 should return "Percent class 1: 0.250000", setting to 44,000 or above will return "Percent class 1: 0.000000", regardless of what max_iter (number of times we sample m observations) is set to.
The first feature is set to 1.0 divided by the total number of observations, if class 0 (first 75% of observations), or the index of the observation divided by the total number of observations otherwise.
The second feature is just index divided by total number of observations.
All other features are random.
The logistic regression is intended to use stochastic gradient descent, randomly selecting an observation index, computing the gradient of the loss with the predicted y using current weights, and updating weights with the gradient and learning rate (eta).
Using the same initialization with Python and NumPy, I still get the proper results, even above 50,000 observations.
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
// Compute z = w * x + b
double dlc( int n, double *X, double *coef, double intercept )
{
double y_pred = intercept;
for (int i = 0; i < n; i++)
{
y_pred += X[i] * coef[i];
}
return y_pred;
}
// Compute y_hat = 1 / (1 + e^(-z))
double sigmoid( int n, double alpha, double *X, double *coef, double beta, double intercept )
{
double y_pred;
y_pred = dlc(n, X, coef, intercept);
y_pred = 1.0 / (1.0 + exp(-y_pred));
return y_pred;
}
// Stochastic gradient descent
void sgd( int m, int n, double *X, double *y, double *coef, double *intercept, double eta, int max_iter, int fit_intercept, int random_seed )
{
double *gradient_coef, *X_i;
double y_i, y_pred, resid;
int idx;
double gradient_intercept = 0.0, alpha = 1.0, beta = 1.0;
X_i = (double *) malloc (n * sizeof(double));
gradient_coef = (double *) malloc (n * sizeof(double));
for ( int i = 0; i < n; i++ )
{
coef[i] = 0.0;
gradient_coef[i] = 0.0;
}
*intercept = 0.0;
srand(random_seed);
for ( int epoch = 0; epoch < max_iter; epoch++ )
{
for ( int run = 0; run < m; run++ )
{
// Randomly sample an observation
idx = rand() % m;
for ( int i = 0; i < n; i++ )
{
X_i[i] = X[n*idx+i];
}
y_i = y[idx];
// Compute y_hat
y_pred = sigmoid( n, alpha, X_i, coef, beta, *intercept );
resid = -(y_i - y_pred);
// Compute gradients and adjust weights
for (int i = 0; i < n; i++)
{
gradient_coef[i] = X_i[i] * resid;
coef[i] -= eta * gradient_coef[i];
}
if ( fit_intercept == 1 )
{
*intercept -= eta * resid;
}
}
}
}
int main(void)
{
double *X, *y, *coef, *y_pred;
double intercept;
double eta = 0.05;
double alpha = 1.0, beta = 1.0;
long m = 50000;
long n = 150;
int max_iter = 20;
long class_0 = (long)(3.0 / 4.0 * (double)m);
double pct_class_1 = 0.0;
clock_t test_start;
clock_t test_end;
double test_time;
printf("Constructing variables...\n");
X = (double *) malloc (m * n * sizeof(double));
y = (double *) malloc (m * sizeof(double));
y_pred = (double *) malloc (m * sizeof(double));
coef = (double *) malloc (n * sizeof(double));
// Initialize classes
for (int i = 0; i < m; i++)
{
if (i < class_0)
{
y[i] = 0.0;
}
else
{
y[i] = 1.0;
}
}
// Initialize observation features
for (int i = 0; i < m; i++)
{
if (i < class_0)
{
X[n*i] = 1.0 / (double)m;
}
else
{
X[n*i] = (double)i / (double)m;
}
X[n*i + 1] = (double)i / (double)m;
for (int j = 2; j < n; j++)
{
X[n*i + j] = (double)(rand() % 100) / 100.0;
}
}
// Fit weights
printf("Running SGD...\n");
test_start = clock();
sgd( m, n, X, y, coef, &intercept, eta, max_iter, 1, 42 );
test_end = clock();
test_time = (double)(test_end - test_start) / CLOCKS_PER_SEC;
printf("Time taken: %f\n", test_time);
// Compute y_hat and share of observations predicted as class 1
printf("Making predictions...\n");
for ( int i = 0; i < m; i++ )
{
y_pred[i] = sigmoid( n, alpha, &X[i*n], coef, beta, intercept );
}
printf("Printing results...\n");
for ( int i = 0; i < m; i++ )
{
//printf("%f\n", y_pred[i]);
if (y_pred[i] > 0.5)
{
pct_class_1 += 1.0;
}
// Troubleshooting print
if (i < 10 || i > m - 10)
{
printf("%g\n", y_pred[i]);
}
}
printf("Percent class 1: %f", pct_class_1 / (double)m);
return 0;
}
For reference, here is my (presumably) equivalent Python code, which returns the correct percent of identified classes at more than 50,000 observations:
import numpy as np
import time
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class LogisticRegressor:
def __init__(self, eta, init_runs, fit_intercept=True):
self.eta = eta
self.init_runs = init_runs
self.fit_intercept = fit_intercept
def fit(self, x, y):
m, n = x.shape
self.coef = np.zeros((n, 1))
self.intercept = np.zeros((1, 1))
for epoch in range(self.init_runs):
for run in range(m):
idx = np.random.randint(0, m)
x_i = x[idx:idx+1, :]
y_i = y[idx]
y_pred_i = sigmoid(x_i.dot(self.coef) + self.intercept)
gradient_w = -(x_i.T * (y_i - y_pred_i))
self.coef -= self.eta * gradient_w
if self.fit_intercept:
gradient_b = -(y_i - y_pred_i)
self.intercept -= self.eta * gradient_b
def predict_proba(self, x):
m, n = x.shape
y_pred = np.ones((m, 2))
y_pred[:,1:2] = sigmoid(x.dot(self.coef) + self.intercept)
y_pred[:,0:1] -= y_pred[:,1:2]
return y_pred
def predict(self, x):
return np.round(sigmoid(x.dot(self.coef) + self.intercept))
m = 50000
n = 150
class1 = int(3.0 / 4.0 * m)
X = np.random.rand(m, n)
y = np.zeros((m, 1))
for obs in range(m):
if obs < class1:
continue
else:
y[obs,0] = 1
for obs in range(m):
if obs < class1:
X[obs, 0] = 1.0 / float(m)
else:
X[obs, 0] = float(obs) / float(m)
X[obs, 1] = float(obs) / float(m)
logit = LogisticRegressor(0.05, 20)
start_time = time.time()
logit.fit(X, y)
end_time = time.time()
print(round(end_time - start_time, 2))
y_pred = logit.predict(X)
print("Percent:", y_pred.sum() / len(y_pred))
The issue is here:
// Randomly sample an observation
idx = rand() % m;
... in light of the fact that the OP's RAND_MAX is 32767. This is exacerbated by the fact that all of the class 0 observations are at the end.
All samples will be drawn from the first 32768 observations, and when the total number of observations is greater than that, the proportion of class 0 observations among those that can be sampled is less than 0.25. At 43691 total observations, there are no class 0 observations among those that can be sampled.
As a secondary issue, rand() % m does not yield a wholly uniform distribution if m does not evenly divide RAND_MAX + 1, though the effect of this issue will be much more subtle.
Bottom line: you need a better random number generator.
At minimum, you could consider combining the bits from two calls to rand() to yield an integer with sufficient range, but you might want to consider getting a third-party generator. There are several available.
Note: OP reports "m=50,000 observations with n=150 features.", so perhaps this is not the issue for OP, but I'll leave this answer up for reference when OP tries larger tasks.
A potential issue:
long overflow
m * n * sizeof(double) risks overflow when long is 32-bit and m*n > LONG_MAX (or about 46,341 if m, n are the same).
OP does report
A first step is to perform the multiplication using size_t math where we gain at least 1 more bit in the calculation.
// m * n * sizeof(double)
sizeof(double) * m * n
Yet unless OP's size_t is more than 32-bit, we still have trouble.
IAC, I recommend to use size_t for array sizing and indexing.
Check allocations for failure too.
Since RAND_MAX may be too small and array indexing should be done using size_t math, consider a helper function to generate a random index over the entire size_t range.
// idx = rand() % m;
size_t idx = rand_size_t() % (size_t)m;
If stuck with the standard rand(), below is a helper function to extend its range as needed.
It uses the real nifty IMAX_BITS(m).
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
// https://stackoverflow.com/a/4589384/2410359
/* Number of bits in inttype_MAX, or in any (1<<k)-1 where 0 <= k < 2040 */
#define IMAX_BITS(m) ((m)/((m)%255+1) / 255%255*8 + 7-86/((m)%255+12))
// Test that RAND_MAX is a power of 2 minus 1
_Static_assert((RAND_MAX & 1) && ((RAND_MAX/2 + 1) & (RAND_MAX/2)) == 0, "RAND_MAX is not a Mersenne number");
#define RAND_MAX_WIDTH (IMAX_BITS(RAND_MAX))
#define SIZE_MAX_WIDTH (IMAX_BITS(SIZE_MAX))
size_t rand_size_t(void) {
size_t index = (size_t) rand();
for (unsigned i = RAND_MAX_WIDTH; i < SIZE_MAX_WIDTH; i += RAND_MAX_WIDTH) {
index <<= RAND_MAX_WIDTH;
index ^= (size_t) rand();
}
return index;
}
Further considerations can replace the rand_size_t() % (size_t)m with a more uniform distribution.
As has been determined elsewhere, the problem is due to the implementation's RAND_MAX value being too small.
Assuming 32-bit ints, a slightly better PRNG function can be implemented in the code, such as this C implementation of the minstd_rand() function from C++:
#define MINSTD_RAND_MAX 2147483646
// Code assumes `int` is at least 32 bits wide.
static unsigned int minstd_seed = 1;
static void minstd_srand(unsigned int seed)
{
seed %= 2147483647;
// zero seed is bad!
minstd_seed = seed ? seed : 1;
}
static int minstd_rand(void)
{
minstd_seed = (unsigned long long)minstd_seed * 48271 % 2147483647;
return (int)minstd_seed;
}
Another problem is that expressions of the form rand() % m produce a biased result when m does not divide (unsigned int)RAND_MAX + 1. Here is an unbiased function that returns a random integer from 0 to le inclusive, making use of the minstd_rand() function defined earlier:
static int minstd_rand_max(int le)
{
int r;
if (le < 0)
{
r = le;
}
else if (le >= MINSTD_RAND_MAX)
{
r = minstd_rand();
}
else
{
int rm = MINSTD_RAND_MAX - le + MINSTD_RAND_MAX % (le + 1);
while ((r = minstd_rand()) > rm)
{
}
r /= (rm / (le + 1) + 1);
}
return r;
}
(Actually, it does still have a very small bias because minstd_rand() will never return 0.)
For example, replace rand() % 100 with minstd_rand_max(99), and replace rand() % m with minstd_rand_max(m - 1). Also replace srand(random_seed) with minstd_srand(random_seed).
I am using Bresenham's circle algorithm for fast circle drawing. However, I also want to (at the request of the user) draw a filled circle.
Is there a fast and efficient way of doing this? Something along the same lines of Bresenham?
The language I am using is C.
Having read the Wikipedia page on Bresenham's (also 'Midpoint') circle algorithm, it would appear that the easiest thing to do would be to modify its actions, such that instead of
setPixel(x0 + x, y0 + y);
setPixel(x0 - x, y0 + y);
and similar, each time you instead do
lineFrom(x0 - x, y0 + y, x0 + x, y0 + y);
That is, for each pair of points (with the same y) that Bresenham would you have you plot, you instead connect with a line.
Just use brute force. This method iterates over a few too many pixels, but it only uses integer multiplications and additions. You completely avoid the complexity of Bresenham and the possible bottleneck of sqrt.
for(int y=-radius; y<=radius; y++)
for(int x=-radius; x<=radius; x++)
if(x*x+y*y <= radius*radius)
setpixel(origin.x+x, origin.y+y);
Here's a C# rough guide (shouldn't be that hard to get the right idea for C) - this is the "raw" form without using Bresenham to eliminate repeated square-roots.
Bitmap bmp = new Bitmap(200, 200);
int r = 50; // radius
int ox = 100, oy = 100; // origin
for (int x = -r; x < r ; x++)
{
int height = (int)Math.Sqrt(r * r - x * x);
for (int y = -height; y < height; y++)
bmp.SetPixel(x + ox, y + oy, Color.Red);
}
bmp.Save(#"c:\users\dearwicker\Desktop\circle.bmp");
You can use this:
void DrawFilledCircle(int x0, int y0, int radius)
{
int x = radius;
int y = 0;
int xChange = 1 - (radius << 1);
int yChange = 0;
int radiusError = 0;
while (x >= y)
{
for (int i = x0 - x; i <= x0 + x; i++)
{
SetPixel(i, y0 + y);
SetPixel(i, y0 - y);
}
for (int i = x0 - y; i <= x0 + y; i++)
{
SetPixel(i, y0 + x);
SetPixel(i, y0 - x);
}
y++;
radiusError += yChange;
yChange += 2;
if (((radiusError << 1) + xChange) > 0)
{
x--;
radiusError += xChange;
xChange += 2;
}
}
}
Great ideas here!
Since I'm at a project that requires many thousands of circles to be drawn, I have evaluated all suggestions here (and improved a few by precomputing the square of the radius):
http://quick-bench.com/mwTOodNOI81k1ddaTCGH_Cmn_Ag
The Rev variants just have x and y swapped because consecutive access along the y axis are faster with the way my grid/canvas structure works.
The clear winner is Daniel Earwicker's method ( DrawCircleBruteforcePrecalc ) that precomputes the Y value to avoid unnecessary radius checks. Somewhat surprisingly that negates the additional computation caused by the sqrt call.
Some comments suggest that kmillen's variant (DrawCircleSingleLoop) that works with a single loop should be very fast, but it's the slowest here. I assume that is because of all the divisions. But perhaps I have adapted it wrong to the global variables in that code. Would be great if someone takes a look.
EDIT: After looking for the first time since college years at some assembler code, I managed find that the final additions of the circle's origin are a culprit.
Precomputing those, I improved the fastest method by a factor of another 3.7-3.9 according to the bench!
http://quick-bench.com/7ZYitwJIUgF_OkDUgnyMJY4lGlA
Amazing.
This being my code:
for (int x = -radius; x < radius ; x++)
{
int hh = (int)std::sqrt(radius_sqr - x * x);
int rx = center_x + x;
int ph = center_y + hh;
for (int y = center_y-hh; y < ph; y++)
canvas[rx][y] = 1;
}
I like palm3D's answer. For being brute force, this is an amazingly fast solution. There are no square root or trigonometric functions to slow it down. Its one weakness is the nested loop.
Converting this to a single loop makes this function almost twice as fast.
int r2 = r * r;
int area = r2 << 2;
int rr = r << 1;
for (int i = 0; i < area; i++)
{
int tx = (i % rr) - r;
int ty = (i / rr) - r;
if (tx * tx + ty * ty <= r2)
SetPixel(x + tx, y + ty, c);
}
This single loop solution rivals the efficiency of a line drawing solution.
int r2 = r * r;
for (int cy = -r; cy <= r; cy++)
{
int cx = (int)(Math.Sqrt(r2 - cy * cy) + 0.5);
int cyy = cy + y;
lineDDA(x - cx, cyy, x + cx, cyy, c);
}
palm3D's brute-force algorithm I found to be a good starting point. This method uses the same premise, however it includes a couple of ways to skip checking most of the pixels.
First, here's the code:
int largestX = circle.radius;
for (int y = 0; y <= radius; ++y) {
for (int x = largestX; x >= 0; --x) {
if ((x * x) + (y * y) <= (circle.radius * circle.radius)) {
drawLine(circle.center.x - x, circle.center.x + x, circle.center.y + y);
drawLine(circle.center.x - x, circle.center.x + x, circle.center.y - y);
largestX = x;
break; // go to next y coordinate
}
}
}
Next, the explanation.
The first thing to note is that if you find the minimum x coordinate that is within the circle for a given horizontal line, you immediately know the maximum x coordinate.
This is due to the symmetry of the circle. If the minimum x coordinate is 10 pixels ahead of the left of the bounding box of the circle, then the maximum x is 10 pixels behind the right of the bounding box of the circle.
The reason to iterate from high x values to low x values, is that the minimum x value will be found with less iterations. This is because the minimum x value is closer to the left of the bounding box than the centre x coordinate of the circle for most lines, due to the circle being curved outwards, as seen on this image
The next thing to note is that since the circle is also symmetric vertically, each line you find gives you a free second line to draw, each time you find a line in the top half of the circle, you get one on the bottom half at the radius-y y coordinate. Therefore, when any line is found, two can be drawn and only the top half of the y values needs to be iterated over.
The last thing to note is that is that if you start from a y value that is at the centre of the circle and then move towards the top for y, then the minimum x value for each next line must be closer to the centre x coordinate of the circle than the last line. This is also due to the circle curving closer towards the centre x value as you go up the circle. Here is a visual on how that is the case.
In summary:
If you find the minimum x coordinate of a line, you get the maximum x coordinate for free.
Every line you find to draw on the top half of the circle gives you a line on the bottom half of the circle for free.
Every minimum x coordinate has to be closer to the centre of the circle than the previous x coordinate for each line when iterating from the centre y coordinate to the top.
You can also store the value of (radius * radius), and also (y * y) instead of calculating them
multiple times.
Here's how I'm doing it:
I'm using fixed point values with two bits precision (we have to manage half points and square values of half points)
As mentionned in a previous answer, I'm also using square values instead of square roots.
First, I'm detecting border limit of my circle in a 1/8th portion of the circle. I'm using symetric of these points to draw the 4 "borders" of the circle. Then I'm drawing the square inside the circle.
Unlike the midpoint circle algorith, this one will work with even diameters (and with real numbers diameters too, with some little changes).
Please forgive me if my explanations were not clear, I'm french ;)
void DrawFilledCircle(int circleDiameter, int circlePosX, int circlePosY)
{
const int FULL = (1 << 2);
const int HALF = (FULL >> 1);
int size = (circleDiameter << 2);// fixed point value for size
int ray = (size >> 1);
int dY2;
int ray2 = ray * ray;
int posmin,posmax;
int Y,X;
int x = ((circleDiameter&1)==1) ? ray : ray - HALF;
int y = HALF;
circlePosX -= (circleDiameter>>1);
circlePosY -= (circleDiameter>>1);
for (;; y+=FULL)
{
dY2 = (ray - y) * (ray - y);
for (;; x-=FULL)
{
if (dY2 + (ray - x) * (ray - x) <= ray2) continue;
if (x < y)
{
Y = (y >> 2);
posmin = Y;
posmax = circleDiameter - Y;
// Draw inside square and leave
while (Y < posmax)
{
for (X = posmin; X < posmax; X++)
setPixel(circlePosX+X, circlePosY+Y);
Y++;
}
// Just for a better understanding, the while loop does the same thing as:
// DrawSquare(circlePosX+Y, circlePosY+Y, circleDiameter - 2*Y);
return;
}
// Draw the 4 borders
X = (x >> 2) + 1;
Y = y >> 2;
posmax = circleDiameter - X;
int mirrorY = circleDiameter - Y - 1;
while (X < posmax)
{
setPixel(circlePosX+X, circlePosY+Y);
setPixel(circlePosX+X, circlePosY+mirrorY);
setPixel(circlePosX+Y, circlePosY+X);
setPixel(circlePosX+mirrorY, circlePosY+X);
X++;
}
// Just for a better understanding, the while loop does the same thing as:
// int lineSize = circleDiameter - X*2;
// Upper border:
// DrawHorizontalLine(circlePosX+X, circlePosY+Y, lineSize);
// Lower border:
// DrawHorizontalLine(circlePosX+X, circlePosY+mirrorY, lineSize);
// Left border:
// DrawVerticalLine(circlePosX+Y, circlePosY+X, lineSize);
// Right border:
// DrawVerticalLine(circlePosX+mirrorY, circlePosY+X, lineSize);
break;
}
}
}
void DrawSquare(int x, int y, int size)
{
for( int i=0 ; i<size ; i++ )
DrawHorizontalLine(x, y+i, size);
}
void DrawHorizontalLine(int x, int y, int width)
{
for(int i=0 ; i<width ; i++ )
SetPixel(x+i, y);
}
void DrawVerticalLine(int x, int y, int height)
{
for(int i=0 ; i<height ; i++ )
SetPixel(x, y+i);
}
To use non-integer diameter, you can increase precision of fixed point or use double values.
It should even be possible to make a sort of anti-alias depending on the difference between dY2 + (ray - x) * (ray - x) and ray2 (dx² + dy² and r²)
If you want a fast algorithm, consider drawing a polygon with N sides, the higher is N, the more precise will be the circle.
I would just generate a list of points and then use a polygon draw function for the rendering.
It may not be the algorithm yo are looking for and not the most performant one,
but I always do something like this:
void fillCircle(int x, int y, int radius){
// fill a circle
for(int rad = radius; rad >= 0; rad--){
// stroke a circle
for(double i = 0; i <= PI * 2; i+=0.01){
int pX = x + rad * cos(i);
int pY = y + rad * sin(i);
drawPoint(pX, pY);
}
}
}
The following two methods avoid the repeated square root calculation by drawing multiple parts of the circle at once and should therefore be quite fast:
void circleFill(const size_t centerX, const size_t centerY, const size_t radius, color fill) {
if (centerX < radius || centerY < radius || centerX + radius > width || centerY + radius > height)
return;
const size_t signedRadius = radius * radius;
for (size_t y = 0; y < radius; y++) {
const size_t up = (centerY - y) * width;
const size_t down = (centerY + y) * width;
const size_t halfWidth = roundf(sqrtf(signedRadius - y * y));
for (size_t x = 0; x < halfWidth; x++) {
const size_t left = centerX - x;
const size_t right = centerX + x;
pixels[left + up] = fill;
pixels[right + up] = fill;
pixels[left + down] = fill;
pixels[right + down] = fill;
}
}
}
void circleContour(const size_t centerX, const size_t centerY, const size_t radius, color stroke) {
if (centerX < radius || centerY < radius || centerX + radius > width || centerY + radius > height)
return;
const size_t signedRadius = radius * radius;
const size_t maxSlopePoint = ceilf(radius * 0.707106781f); //ceilf(radius * cosf(TWO_PI/8));
for (size_t i = 0; i < maxSlopePoint; i++) {
const size_t depth = roundf(sqrtf(signedRadius - i * i));
size_t left = centerX - depth;
size_t right = centerX + depth;
size_t up = (centerY - i) * width;
size_t down = (centerY + i) * width;
pixels[left + up] = stroke;
pixels[right + up] = stroke;
pixels[left + down] = stroke;
pixels[right + down] = stroke;
left = centerX - i;
right = centerX + i;
up = (centerY - depth) * width;
down = (centerY + depth) * width;
pixels[left + up] = stroke;
pixels[right + up] = stroke;
pixels[left + down] = stroke;
pixels[right + down] = stroke;
}
}
This was used in my new 3D printer Firmware, and it is proven the
fastest way for filled circle of a diameter from 1 to 43 pixel. If
larger is needed, the following memory block(or array) should be
extended following a structure I wont waste my time explaining...
If you have questions, or need larger diameter than 43, contact me, I
will help you drawing the fastest and perfect filled circles... or
Bresenham's circle drawing algorithm can be used above those
diameters, but having to fill the circle after, or incorporating the
fill into Bresenham's circle drawing algorithm, will only result in
slower fill circle than my code. I already benchmarked the different
codes, my solution is 4 to 5 times faster. As a test I have been
able to draw hundreds of filled circles of different size and colors
on a BigTreeTech tft24 1.1 running on a 1-core 72 Mhz cortex-m4
https://www.youtube.com/watch?v=7_Wp5yn3ADI
// this must be declared anywhere, as static or global
// as long as the function can access it !
uint8_t Rset[252]={
0,1,1,2,2,1,2,3,3,1,3,3,4,4,2,3,4,5,5,5,2,4,5,5,
6,6,6,2,4,5,6,6,7,7,7,2,4,5,6,7,7,8,8,8,2,5,6,7,
8,8,8,9,9,9,3,5,6,7,8,9,9,10,10,10,10,3,5,7,8,9,
9,10,10,11,11,11,11,3,5,7,8,9,10,10,11,11,12,12,
12,12,3,6,7,9,10,10,11,12,12,12,13,13,13,13,3,6,
8,9,10,11,12,12,13,13,13,14,14,14,14,3,6,8,9,10,
11,12,13,13,14,14,14,15,15,15,15,3,6,8,10,11,12,
13,13,14,14,15,15,15,16,16,16,16,4,7,8,10,11,12,
13,14,14,15,16,16,16,17,17,17,17,17,4,7,9,10,12,
13,14,14,15,16,16,17,17,17,18,18,18,18,18,4,7,9,
11,12,13,14,15,16,16,17,17,18,18,18,19,19,19,19,
19,7,9,11,12,13,15,15,16,17,18,18,19,19,20,20,20,
20,20,20,20,20,7,9,11,12,14,15,16,17,17,18,19,19
20,20,21,21,21,21,21,21,21,21};
// SOLUTION 1: (the fastest)
void FillCircle_v1(uint16_t x, uint16_t y, uint16_t r)
{
// all needed variables are created and set to their value...
uint16_t radius=(r<1) ? 1 : r ;
if (radius>21 ) {radius=21; }
uint16_t diam=(radius*2)+1;
uint16_t ymir=0, cur_y=0;
radius--; uint16_t target=(radius*radius+3*radius)/2; radius++;
// this part draws directly into the ILI94xx TFT buffer mem.
// using pointers..2 versions where you can draw
// pixels and lines with coordinates will follow
for (uint16_t yy=0; yy<diam; yy++)
{ ymir= (yy<=radius) ? yy+target : target+diam-(yy+1);
cur_y=y-radius+yy;
uint16_t *pixel=buffer_start_addr+x-Rset[ymir]+cur_y*buffer_width;
for (uint16_t xx= 0; xx<=(2*Rset[ymir]); xx++)
{ *pixel++ = CANVAS::draw_color; }}}
// SOLUTION 2: adaptable to any system that can
// add a pixel at a time: (drawpixel or add_pixel,etc_)
void FillCircle_v2(uint16_t x, uint16_t y, uint16_t r)
{
// all needed variables are created and set to their value...
uint16_t radius=(r<1) ? 1 : r ;
if (radius>21 ) {radius=21; }
uint16_t diam=(radius*2)+1;
uint16_t ymir=0, cur_y=0;
radius--; uint16_t target=(radius*radius+3*radius)/2; radius++;
for (uint16_t yy=0; yy<diam; yy++)
{ ymir= (yy<=radius) ? yy+target : target+diam-(yy+1);
cur_y=y-radius+yy;
uint16_t Pixel_x=x-Rset[ymir];
for (uint16_t xx= 0; xx<=(2*Rset[ymir]); xx++)
{ //use your add_pixel or draw_pixel here
// using those coordinates:
// X position will be... (Pixel_x+xx)
// Y position will be... (cur_y)
// and add those 3 brackets at the end
}}}
// SOLUTION 3: adaptable to any system that can draw fast
// horizontal lines
void FillCircle_v3(uint16_t x, uint16_t y, uint16_t r)
{
// all needed variables are created and set to their value...
uint16_t radius=(r<1) ? 1 : r ;
if (radius>21 ) {radius=21; }
uint16_t diam=(radius*2)+1;
uint16_t ymir=0, cur_y=0;
radius--; uint16_t target=(radius*radius+3*radius)/2; radius++;
for (uint16_t yy=0; yy<diam; yy++)
{ ymir= (yy<=radius) ? yy+target : target+diam-(yy+1);
cur_y=y-radius+yy;
uint16_t start_x=x-Rset[ymir];
uint16_t width_x=2*Rset[ymir];
// ... then use your best drawline function using those values:
// start_x: position X of the start of the line
// cur_y: position Y of the current line
// width_x: length of the line
// if you need a 2nd coordinate then :end_x=start_x+width_x
// and add those 2 brackets after !!!
}}
I did pretty much what AlegGeorge did but I changed three lines. I thought that this is faster but these are the results am I doing anything wrong? my function is called DrawBruteforcePrecalcV4. here's the code:
for (int x = 0; x < radius ; x++) // Instead of looping from -radius to radius I loop from 0 to radius
{
int hh = (int)std::sqrt(radius_sqr - x * x);
int rx = center_x + x;
int cmx = center_x - x;
int ph = center_y+hh;
for (int y = center_y-hh; y < ph; y++)
{
canvas[rx][y] = 1;
canvas[cmx][y] = 1;
}
}