I'm trying to vectorize some code.
Idea: we have a pixel(__m128 in), if any of it's elements is bigger than upper, replace entier pixel with different pixel(__m128 upper_color)
Unvectorized code that works:
if(inp[0] >= upper || inp[1] >= upper || inp[2] >= upper)
{
outp[0] = upper_color[0];
outp[1] = upper_color[1];
outp[2] = upper_color[2];
}
So far i came up with following, but (i believe so) it replaces not entire pixel, but only those components that are bigger than upper:
const __m128 pixel = _mm_load_ps(in);
const __m128 isoe = _mm_cmpge_ps(pixel, upper);
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel),
_mm_and_ps(isoe, upper_color));
_mm_stream_ps(out, result);
Let's assume upper = 1,1,1 and upper_color = 1,0,0
Fourth channel is alpha, so i do not care about it.
Results:
IN: 0.5 0.3 0.7
OUT: 0.5 0.3 0.7 (Expected)
OUT: 0.5 0.3 0.7 (Recieved)
IN: 1.5 1.1 0.7
OUT: 1 0 0 (Expected)
OUT: 1 0 0.7 (Recieved)
Maybe someone could help me? Is this is even possible?
You need to compute horizontal OR. There is no horizontal OR instruction in SSE, but such operation can be simulated with 2x UNPACK + vertical OR.
const __m128 pixel = _mm_load_ps(in);
/* (p3, p2, p1, p0 ) */
__m128 isoe = _mm_cmpge_ps(pixel, upper);
/* (p3|p1, p2|p0, p3|p1, p2|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
/* (p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel), _mm_and_ps(isoe, upper_color));
_mm_stream_ps(out, result);
You can use _mm_movemask_epi8 to do a horizontal OR.
#include <stdio.h>
#include <emmintrin.h>
void foo(float ina[]) {
//float ina[] = {0.5, 0.3, 0.7, 0};
float uppera[] = {1,1,1,1};
float upper_colora[] = {1,0,0,0};
float out[4];
__m128 in = _mm_load_ps(ina);
__m128 upper = _mm_load_ps(uppera);
__m128 upper_color = _mm_load_ps(upper_colora);
const __m128 pixel = _mm_load_ps(ina);
const __m128 isoe = _mm_cmpge_ps(pixel, upper);
if(_mm_movemask_epi8(_mm_castps_si128(isoe))) {
_mm_stream_ps(out, upper_color);
}
else {
_mm_stream_ps(out, in);
}
printf("%f %f %f %f\n", out[0], out[1], out[2], out[3]);
}
int main() {
float ina1[] = {0.5, 0.3, 0.7, 0}; //output 0.5 0.3 0.7 0.0
float ina2[] = {0.5, 1.1, 0.7, 0}; //output 1.0 0.0 0.0 0.0
foo(ina1);
foo(ina2);
}
Related
I am writing a code in C. Checking the roots if they satisfy the quadratic equation.
In printf, while %lf outputs 0, %e returns a very small value (between 10-15 and 10-19).
I tried to declare the variable as float or double, nothing is working when I am printing using %e. What is wrong?
Also any suggestions to improve the logic would be appreciated. I am a beginner in C. thanks.
----------------------code-------------------------------
#include <stdio.h>
#include <math.h>
struct param {
double a[11];
} x, y, z;
double slvequad(double a1, double b1, double c1) {
double quad_sol1, quad_sol2;
double dcrm = b1 * b1 - 4.0 * a1 * c1;
printf("\n ( %+0.2e, %+0.2e, %+0.2e)", a1, b1, c1);
if (a1 != 0 && dcrm >= 0) {
quad_sol1 = (-b1 + sqrt(dcrm)) / (2.0 * a1);
quad_sol2 = (-b1 - sqrt(dcrm)) / (2.0 * a1);
printf("( %+0.2e , %+0.2e ) ", quad_sol1, quad_sol2);
quadcheck(a1, b1, c1, quad_sol1, quad_sol2);
} else
if (a1 == 0 && b1 != 0) {
quad_sol1 = quad_sol2 = -c1 / b1;
printf("( %+0.2e ) ", quad_sol1);
quadcheck(a1, b1, c1, quad_sol1, quad_sol2);
} else {
printf(" No solution ");
}
}
void quadcheck(double x1, double y1, double z1, double sol1, double sol2) {
double v1 = (x1 * (sol1) * (sol1) + y1 * sol1 + z1);
double v2 = (x1 * (sol2) * (sol2) + y1 * sol2 + z1);
printf("( %+0.2e , %+0.2e ) \n ", v1, v2);
}
int main() {
struct param x = { 1e-10, 0, 1, 0, 0, 1e-35, 1, 3, 4, 1, 1.0 };
struct param y = { 2.0, 1, 0, 1, 0, 0, 4, 5, -20, 6, -1e-1 };
struct param z = { 1e-10, 5, -4, 0, 0, -1e35, 1, -7, 25, 34, 0.0025 };
int i;
int len = *(&x.a + 1) - x.a;
for (i = 0; i < len; i++) {
slvequad(x.a[i], y.a[i], z.a[i]);
}
return 0;
}
output: In format: (a1,b1,c1)(quad_sol1, quad_sol2)(v1,v1)
Technically, v1 and v2 should print 0, but it prints very small value in a few cases (using %e) as shown in the image.
The roots for x2 + 4x + 1 are irrational numbers -2 +/-sqrt(3). They cannot be represented exactly in types float or double, only approximations are stored, precise to about 17 significant digits for type double. Computing the polynomial with these approximations produce very small, yet non zero numbers.
You may want to read these questions:
Is floating point math broken?
Why are floating point numbers inaccurate?
Is floating-point == ever OK?
I've been trying to add this post-processing (taken from sebastian lague video which I am trying to convert from unity to threejs) effect that when a ray hits the ocean on my mesh (the blue):
it is colored white (just like in his video):
and everywhere else the original color is returned. But for the life of me can't seem to figure out the problem, I assume my ray origin or direction might be wrong but nothing seems to work, Here's the code that I pass to the ray Sphere intersection function and the function itself.
vec2 raySphere(vec3 centre, float radius, vec3 rayOrigin, vec3 rayDir) {
vec3 offset = rayOrigin - centre;
float a = 1.0; // set to dot(rayDir, rayDir) instead of rayDir may not be normalized
float b = 2.0 * dot(offset, rayDir);
float c = dot(offset, offset) - radius * radius;
float discriminant = b*b-4.0*a*c;
// No intersection: discriminant < 0
// 1 intersection: discriminant == 0
// 2 intersection: discriminant > 0
if(discriminant > 0.0) {
float s = sqrt(discriminant);
float dstToSphereNear = max(0.0, (-b - s) / (2.0 * a));
float dstToSphereFar = (-b + s) / (2.0 * a);
if (dstToSphereFar >= 0.0) {
return vec2(dstToSphereNear, dstToSphereFar-dstToSphereNear);
}
}
return vec2(99999999, 0.0);
}
vec4 ro = inverse(modelMatrix) * vec4(cameraPosition, 1.0);
vec3 rd = normalize(position - ro.xyz);
vec3 oceanCentre = vec3(0.0, 0.0, 0.0);
float oceanRadius = 32.0;
vec2 hitInfo = raySphere(oceanCentre, oceanRadius, ro.xyz, rd);
float dstToOcean = hitInfo.x;
float dstThroughOcean = hitInfo.y;
vec3 rayOceanIntersectPos = ro.xyz + rd * dstToOcean - oceanCentre;
// dst that view ray travels through ocean (before hitting terrain / exiting ocean)
float oceanViewDepth = min(dstThroughOcean, depth - dstToOcean);
vec4 oceanCol;
float alpha;
if(oceanViewDepth > 0.0) {
gl_FragColor = vec4(vec3(1.0), .1);
}
gl_FragColor = texture2D(tDiffuse, vUv);
Can someone help point out where I might be messing up?
Oh wow, we're in the same place while we're stuck at making these shaders. I checked your ray intersectors have small problems. But here is the cases:
What we want if case 3 happens like on your example, so the intersection are in count the problem probably come from no depth correction by doing this:
Make sure your sphere intersection max depth same as the camera.
I do suspect if the last line is the problem, try do this:
vec3 col; // Declare the color
vec2 o = sphere(ro, rd, vec3(0), 1.0); // Ocean Depth.
float oceanViewDepth = min(o.y - o.x, t - o.x);
if(depth > 0.0 && tmax > depth) {
col = originalCol;
}
if(oceanViewDepth > 0.0) {
col = vec3(1);
}
gl_FragColor = vec4(col, 1.0);
If that doesn't work for you I have some finished example for you to checkout at shadertoy
Most the rounding questions on Stack Overflow are about specific cases and, as I just made a generic rounding function, I thought I would share it.
I have a number f, a starting value a, and an increment b, and I want to “round” f to the nearest element of the set {a + b•i | i is an integer}. For example:
a = 0.0, b = 0.5: round to one of the values 0.0, 0.5, 1.0, 1.5, etc.
a = 0.25, b = 0.5: round to one of the values 0.25, 0.75, 1.25, 1.75, etc.
a = 0.21, b = 0.23: round to one of the values 0.21, 0.44, 0.67, 0.80, etc.
The standard C library has roundf() but it only rounds to the nearest integer.
How do I go about doing this?
#include <stdio.h>
#include <math.h>
float round_float(float x, float inc, float start_val)
{
return ( roundf( (x - start_val) / inc ) * inc + start_val );
}
int main(void)
{
printf("%f\n", round_float(12.522, 0.5, 0));
printf("%f\n", round_float(12.522, -0.5, -0.1));
printf("%f\n", round_float(5.318, 0.23, 125));
printf("%f\n", round_float(-12.522, 12, 5));
printf("%f\n", round_float(-12.522, 3.6, -2));
}
output:
12.500000
12.400000
5.400002
-7.000000
-12.799999
first example: round_float(10.1521, 0.5, 0)
second example: round_float(10.1521, 0.5, 0.25)
final example: round_float(10.1521, 0.23, 0.21)
Extended example
This code shows how random sample values are rounded to the nearest entry in each of three sequences specified by the starting value and the increment — using the values outlined in the question.
#include <stdio.h>
#include <math.h>
static inline float round_float(float x, float inc, float start_val)
{
return roundf((x - start_val) / inc) * inc + start_val;
}
int main(void)
{
printf("%f\n", round_float(12.522, 0.5, 0));
printf("%f\n", round_float(12.522, -0.5, -0.1));
printf("%f\n", round_float(5.318, 0.23, 125));
printf("%f\n", round_float(-12.522, 12, 5));
printf("%f\n", round_float(-12.522, 3.6, -2));
static const float samples[] =
{
-14.2751, -12.3080, -10.5320, -6.4804, -1.0859,
0.1999, 0.2099, 5.2980, 5.7819, 11.7052,
};
enum { NUM_SAMPLES = sizeof(samples) / sizeof(samples[0]) };
static const float control[][2] =
{
{ 0.00, 0.50 },
{ 0.25, 0.50 },
{ 0.21, 0.23 },
};
enum { NUM_CONTROL = sizeof(control) / sizeof(control[0]) };
for (int i = 0; i < NUM_CONTROL; i++)
{
float a = control[i][0];
float b = control[i][1];
printf("Start: %8.4f; increment %8.4f\n", a, b);
for (int j = 0; j < NUM_SAMPLES; j++)
{
printf(" Sample: %8.4f rounds to %8.4f\n",
samples[j], round_float(samples[j], b, a));
}
}
return 0;
}
Example output:
12.500000
12.400000
5.400002
-7.000000
-12.799999
Start: 0.0000; increment 0.5000
Sample: -14.2751 rounds to -14.5000
Sample: -12.3080 rounds to -12.5000
Sample: -10.5320 rounds to -10.5000
Sample: -6.4804 rounds to -6.5000
Sample: -1.0859 rounds to -1.0000
Sample: 0.1999 rounds to 0.0000
Sample: 0.2099 rounds to 0.0000
Sample: 5.2980 rounds to 5.5000
Sample: 5.7819 rounds to 6.0000
Sample: 11.7052 rounds to 11.5000
Start: 0.2500; increment 0.5000
Sample: -14.2751 rounds to -14.2500
Sample: -12.3080 rounds to -12.2500
Sample: -10.5320 rounds to -10.7500
Sample: -6.4804 rounds to -6.2500
Sample: -1.0859 rounds to -1.2500
Sample: 0.1999 rounds to 0.2500
Sample: 0.2099 rounds to 0.2500
Sample: 5.2980 rounds to 5.2500
Sample: 5.7819 rounds to 5.7500
Sample: 11.7052 rounds to 11.7500
Start: 0.2100; increment 0.2300
Sample: -14.2751 rounds to -14.2800
Sample: -12.3080 rounds to -12.2100
Sample: -10.5320 rounds to -10.6000
Sample: -6.4804 rounds to -6.4600
Sample: -1.0859 rounds to -1.1700
Sample: 0.1999 rounds to 0.2100
Sample: 0.2099 rounds to 0.2100
Sample: 5.2980 rounds to 5.2700
Sample: 5.7819 rounds to 5.7300
Sample: 11.7052 rounds to 11.7100
I'm sure a more judicious choice of sample values would make the point even clearer; those are just a collection of random numbers in the range -20 .. +20 in sorted order.
Apologies for the extra level of indent in the last example.
The preview was not showing 'code' when it was not indented two levels, and I've no idea why. (Configuration: Firefox Quantum 57.0.1 (restart pending) on macOS High Sierra 10.13.2 and SO 2017.12.22.28257)
I have the following code for finding quartiles:
#include <stdio.h>
#include <stdlib.h>
typedef struct {
double qrt[3];
double *value;
int count;
} t_data;
static void set_qrt(t_data *data, int qrt)
{
int n, e;
double d;
d = qrt * 0.25 * data->count + 0.5;
n = (int)d;
e = n != d;
data->qrt[qrt - 1] = data->value[n - 1];
if (e) {
data->qrt[qrt - 1] += data->value[n];
data->qrt[qrt - 1] *= 0.5;
}
}
static void set_qrts(t_data *data)
{
set_qrt(data, 2);
if (data->count > 1) {
set_qrt(data, 1);
set_qrt(data, 3);
} else {
data->qrt[0] = 0.0;
data->qrt[2] = 0.0;
}
}
static int comp(const void *pa, const void *pb)
{
const double a = *(const double *)pa;
const double b = *(const double *)pb;
return (a > b) ? 1 : (a < b) ? -1 : 0;
}
int main(void)
{
double values[] = {3.7, 8.9, 7.1, 5.4, 1.2, 6.8, 4.3, 2.7};
t_data data;
data.value = values;
data.count = (int)(sizeof(values) / sizeof(double));
qsort(data.value, data.count, sizeof(double), comp);
set_qrts(&data);
printf("Q1 = %.1f\nQ2 = %.1f\nQ3 = %.1f\n", data.qrt[0], data.qrt[1], data.qrt[2]);
}
Is
d = qrt * 0.25 * data->count + 0.5;
n = (int)d;
e = n != d;
guaranteed to work as expected? (e == isinteger(d))
Numbers 0.5, 0.25, 0.125 and so on represent negative powers of two, and therefore are representable exactly in IEEE 754 types. Using these numbers does not result in representation errors.
The values 0.5 and 0.25 themselves will be exact. The intermediate values of your calculation may or may not be, depending on their range. IEEE doubles have a 52-bit mantissa, so they will exactly represent to the 0.25 numbers that need 50 bits or fewer in the mantissa, which is about 15 decimal digits.
So if you add 0.25 to 100000000000000 (10^14), you'll get 100000000000000.25. But if you add 0.25 to 10000000000000000 (10^16), you'll lose the fraction.
dasblinkenlight is absolutely correct. Double/float and integer types are stored differently according to IEEE754. Watch this for an easy tutorial if you are curious about it.
The double precision floating point format has 53 bits in its manitissa of which one is implicit. This means that it can represent all positive and negative integers in the range 2^0 to 2^53-1.
0 (zero) is a special case which has its own format.
When it comes to a 0.25 spacing the range is straight-forwardly calculated to be 2^-2 to 2^51-0.25. This means that quite a few but by no means all multiples of 0.25 are exactly representable in the double precision format, just as a quite a few but not all integers are exactly representable.
So if you have an exactly representable spacing of 2^x the representable range is 2^x to 2^(53+x)-2^x.
#include <GL/glut.h>
#include <GL/gl.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define N 200
typedef struct vertex{
float x; // x position of the point
float y; // y position of the point
float r; // red color component of the point
float g; // green color component of the point
float b; // blue color component of the point
char isVisited;
}Vertex;
Vertex *borderLines,*interPolationLines;
int vertex_Count;// total vertex
int counter;//counts matched y coordinates
FILE *f,*g;
void readTotalVertexCount(){
if((f = fopen("vertex.txt","r"))==NULL){
printf("File could not been read\n");
return ;
}
fscanf(f,"%d",&vertex_Count);
/*if((g = fopen("points.txt","w"))==NULL){
return ;
}*/
}
void readVertexCoordinatesFromFile(){
Vertex v[vertex_Count];
borderLines = (Vertex *)calloc(N*vertex_Count,sizeof(Vertex));
interPolationLines = (Vertex *)calloc(N*N*(vertex_Count-1),sizeof(Vertex));
int i = 0;int j;
//read vertexes from file
while(i<vertex_Count){
fscanf(f,"%f",&(v[i].x));
fscanf(f,"%f",&(v[i].y));
fscanf(f,"%f",&(v[i].r));
fscanf(f,"%f",&(v[i].g));
fscanf(f,"%f",&(v[i].b));
//printf("%f %f \n",v[i].x,v[i].y);
i++;
}
Vertex *borderLine,*temp;
float k,landa;
// draw border line actually I am doing 1D Interpolation with coordinates of my vertexes
for (i = 0;i < vertex_Count;i++){
int m = i+1;
if(m==vertex_Count)
m = 0;
borderLine = borderLines + i*N;
for(j = 0;j < N; j++){
k = (float)j/(N - 1);
temp = borderLine + j;
landa = 1-k;
//finding 1D interpolation coord. actually they are borders of my convex polygon
temp->x = v[i].x*landa + v[m].x*k;
temp->y = v[i].y*landa + v[m].y*k;
temp->r = v[i].r*landa + v[m].r*k;
temp->g = v[i].g*landa + v[m].g*k;
temp->b = v[i].b*landa + v[m].b*k;
temp->isVisited = 'n'; // I didn't visit this point yet
//fprintf(g,"%f %f %f %f %f\n",temp->x,temp->y,temp->r,temp->g,temp->b);
}
}
/* here is actual place I am doing 2D Interpolation
I am traversing along the border of the convex polygon and finding the points have the same y coordinates
Between those two points have same y coord. I am doing 1D Interpolation*/
int a;counter = 0;
Vertex *searcherBorder,*wantedBorder,*interPolationLine;
int start = N*(vertex_Count); int finish = N*vertex_Count;
for(i = 0;i< start ;i++){
searcherBorder = i + borderLines;
for(j = i - i%N + N +1; j< finish; j++){
wantedBorder = j + borderLines;
if((searcherBorder->y)==(wantedBorder->y) && searcherBorder->isVisited=='n' && wantedBorder->isVisited=='n'){
//these points have been visited
searcherBorder->isVisited = 'y';
wantedBorder->isVisited = 'y';
interPolationLine = interPolationLines + counter*N;
//counter variable counts the points have same y coordinates.
counter++;
//printf("%d %d %d\n",i,j,counter);
//same as 1D ınterpolation
for(a= 0;a< N;a++){
k = (float)a/(N - 1);
temp = interPolationLine + a;
landa = 1-k;
temp->x = (wantedBorder->x)*landa + (searcherBorder->x)*k;
temp->y = (wantedBorder->y)*landa + (searcherBorder->y)*k;
temp->r = (wantedBorder->r)*landa + (searcherBorder->r)*k;
temp->g = (wantedBorder->g)*landa + (searcherBorder->g)*k;
/*if(temp->x==temp->y)
printf("%f %f \n",wantedBorder->x,searcherBorder->x);*/
temp->b = (wantedBorder->b)*landa + (searcherBorder->b)*k;
}
}
}
}
fclose(f);
}
void display(void){
glClear(GL_COLOR_BUFFER_BIT);
glColor3f(1.0,1.0,1.0);
int i,j;
Vertex *interPol,*temp;
glBegin (GL_POINTS);
for(i = 0;i< counter;i++){
interPol = interPolationLines + i*N;
for(j = 0;j< N;j++){
temp = interPol + j;
glColor3f((temp)->r,(temp)->g,(temp)->b);
//fprintf(g,"%f %f \n",(temp)->x,(temp)->y);
glVertex2f ((temp)->x,(temp)->y);
}
}
//printf("%d\n",counter);
fclose(g);
glEnd ();
glFlush();
}
void init(void){
glutInitDisplayMode( GLUT_RGB | GLUT_SINGLE);
glutInitWindowSize(900,500);
glutInitWindowPosition(200,100);
glutCreateWindow("2D InterPolation");
glClearColor(0.0, 0.0, 0.0, 0.0);
glClear(GL_COLOR_BUFFER_BIT);
glShadeModel(GL_SMOOTH);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
}
int main(int argc, char** argv)
{
readTotalVertexCount();
readVertexCoordinatesFromFile();
glutInit(&argc,argv);
init();
glutDisplayFunc(display);
glutMainLoop();
return 0;
}
I am implementing 2D Interpolation of a convex polygon and my code does not care about concav.my code works for some convex polygons but for others fail.For those my code fails it does not draw middle of the polygon.it only draws an upper and lower triangle.it reads vertexes from file vertex.txt and its format:x co,y co,red,green,blue color info of that point like below and for the values below my code fails.Thanks for replies in advance.I will get mad.
7
0.9 0.4 1.0 0.0 1.0
0.8 0.2 1.0 0.0 1.0
0.5 0.1 1.0 0.0 0.0
0.3 0.3 0.0 0.0 1.0
0.3 0.35 0.0 0.0 1.0
0.4 0.4 0.0 1.0 0.0
0.6 0.5 1.0 1.0 1.0
Without fully debugging your program, I'm suspicious of the line that says, for(j = i - i%N + N +1; j< finish; j++){. I don't know exactly what you're intending to do, but it just looks suspicious. Furthermore, I would recommend a different algorithm:
Trace around the polygon
Mark any edges that span the desired y-value
Corner cases aside, there's only a solution if you find exactly two hits.
Calculate the intersection of the edges with the y-value
Perform the x-interpolation
Also, concise questions are better than, "Why doesn't my program work?" Forgive me but it feels like a homework problem.
Note: Should this be a comment instead of an answer? I'm new here...