In a 1 dimensional space:
x = i
In a 2 dimensional space (of size sx, sy):
x = i / sx
y = i % sx
In a 3 dimensional space (of size sx, sy, sz):
x = i / (sy*sz)
y = (i/sz) % sy
z = i % sz
How to deal with an N dimensional space? How can these formulas be generalized?
What about the inverse conversion?
(x1, x2, ..., xn) --> i
Note: all variables are integer.
I guess the common formulas looks like:
to check this formulas i used this programs and it looks like works.
#include <iostream>
#include <string>
#include <string.h>
#define Sz1 2
#define Sz2 3
#define Sz3 4
#define Sz4 5
using namespace std;
int main()
{
int a[Sz4][Sz3][Sz2][Sz1];
int i,j,k,l,n,x1,x2,x3,x4,s1,s2,s3,s4;
s1=Sz1;s2=Sz2;s3=Sz3,s4=Sz4;
memset(a,0,sizeof(a));
for (i=0;i<s1*s2*s3*s4;i++){
x4= i/(s1*s2*s3);
x3 = i / (s1*s2) % s3;
x2 = (i/s1) % s2;
x1 = i % s1;
a[x4][x3][x2][x1]=i;
}
for (l=0;l<Sz4;l++) {
for (i=0;i<Sz3;i++) {
for(j=0;j<Sz2;j++) {
for(k=0;k<Sz1;k++) {
n=s1*s2*s3*l+s1*s2*i+s1*j+k;//inverse convertion
cout<<a[l][i][j][k]<<"{"<<n<<"} ";
}
cout<<"|";
}
cout<<endl;
}
cout<<endl<<endl;
}
return 0;
}
Related
I am trying to do something like this...
#define INPUT (x = 3, y = 5)
#define MATH(add) ((add == 1) ? (INPUT.x + INPUT.y) : (INPUT.x - INPUT.y))
void main (void)
{
int add = MATH (1);
int subs = MATH (0);
}
Basically I want to simplify this...
#define x 3
#define y 5
#define MATH(add) ((add == 1) ? (x + y) : (x - y))
void main (void)
{
int add = MATH (1);
int subs = MATH (0);
}
Is this possible in C somehow?
Since you want to do cpp drugs so badly, here is some food for thought:
#include <stdio.h>
#define x 3
#define y 5
#define x1 +
#define x0 -
#define MATH(a) x x##a y
int main(void) {
int add = MATH(1);
int subs = MATH(0);
printf("add=%d, sub=%d\n", add, subs);
return 0;
}
Say I have a high floating point number... 1345.23
I want to reduce it by 2*PI until it stays between -PI and +PI so I'd do:
#define PI 3.14159265358f
#define TWO_PI 6.28318530718f
float a = 1345.23f;
while (a > PI) a -= TWO_PI;
Do you know a fastest method?
With this code you will enter in the loop just 1 time (you can delate it adding just a more a -= TWO_PI
#include <stdio.h>
#define PI 3.14159265358f
#define TWO_PI 6.28318530718f
int main(void) {
float a = 1345.23f;
float b = 1345.23 - PI;
int c = b/TWO_PI;
a -= c*TWO_PI;
int i = 0;
while (a > PI){
a -= TWO_PI;
printf("%d",i++);
}
printf("\na : %f",a);
}
OUTPUT:
0
a : 0.628314
While your code will do the cicle :
214 times
BETTER CODE:
#include <stdio.h>
#define PI 3.14159265358f
#define TWO_PI 6.28318530718f
#define INV_TWO_PI 0.15915494309189533
int main(void) {
double a = 1345.23;
if(a > PI){
double b = a - PI; // you get the distance between a and PI
// int c = b/TWO_PI; // you get the integer part
int c = b * INV_TWO_PI; // the same as above using multiplication
a -= (c+1)*TWO_PI; // you just subtract c+1 times TWO_PI
// c+1 cause you want come in the range [-PI,PI]
}
}
Not the fastest, but the shortest code:
y = asin(sin(a));
Assuming that your code has to do with phase wrapping in radians, so that values between PI and TWO_PI can map between -PI and 0.0 a simple and fast solution would be:
double a = 1345.23;
double b = TWO_PI;
double c = (fmod(a,b) > PI ? fmod(a,b) - b : fmod(a,b));
After accept answer
To quickly reduce between -PI and PI, simply use remquof();
#include <math.h>
#include <stdio.h>
float reduce_radian(float x) {
static const float pi2 = 6.283185307179586476925286766559f;
int n;
return remquof(x, pi2, &n);
}
int main(void) {
printf("x % .10e\ty % .10e\n", 1e-30, reduce_radian(1e-30));
for (float x = 0.0f; x <= 4.0f; x += 1.0f) {
printf("x % .10f\ty % .10f\n", -x, reduce_radian(-x));
printf("x % .10f\ty % .10f\n", x, reduce_radian(x));
}
}
Output
x 1.0000000000e-30 y 1.0000000032e-30
x -0.0000000000 y -0.0000000000
x 0.0000000000 y 0.0000000000
x -1.0000000000 y -1.0000000000
x 1.0000000000 y 1.0000000000
x -2.0000000000 y -2.0000000000
x 2.0000000000 y 2.0000000000
x -3.0000000000 y -3.0000000000
x 3.0000000000 y 3.0000000000
x -4.0000000000 y 2.2831854820
x 4.0000000000 y -2.2831854820
To understand why this is not the best precise answer, is a deep subject.
See K.C. Ng's "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
I currently have to run a nested loop as follow:
for(int i = 0; i < N; i++){
for(int j = i+1; j <= N; j++){
compute(...)//some calculation here
}
}
I've tried leaving the first loop in CPU and do the second loop in GPU. Results are too many memory access. Is there any other ways to do it? For example by thrust::reduce_by_key?
The whole program is here:
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/binary_search.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/random.h>
#include <cmath>
#include <iostream>
#include <iomanip>
#define N 1000000
// define a 2d point pair
typedef thrust::tuple<float, float> Point;
// return a random Point in [0,1)^2
Point make_point(void)
{
static thrust::default_random_engine rng(12345);
static thrust::uniform_real_distribution<float> dist(0.0f, 1.0f);
float x = dist(rng);
float y = dist(rng);
return Point(x,y);
}
struct sqrt_dis: public thrust::unary_function<Point, double>
{
float x, y;
double tmp;
sqrt_dis(float _x, float _y): x(_x), y(_y){}
__host__ __device__
float operator()(Point a)
{
tmp =(thrust::get<0>(a)-x)*(thrust::get<0>(a)-x)+\
(thrust::get<1>(a)-y)*(thrust::get<1>(a)-y);
tmp = -1.0*(sqrt(tmp));
return (1.0/tmp);
}
};
int main(void) {
clock_t t1, t2;
double result;
t1 = clock();
// allocate some random points in the unit square on the host
thrust::host_vector<Point> h_points(N);
thrust::generate(h_points.begin(), h_points.end(), make_point);
// transfer to device
thrust::device_vector<Point> points = h_points;
thrust::plus<double> binary_op;
float init = 0;
for(int i = 0; i < N; i++){
Point tmp_i = points[i];
float x = thrust::get<0>(tmp_i);
float y = thrust::get<1>(tmp_i);
result += thrust::transform_reduce(points.begin()+i,\
points.end(),sqrt_dis(x,y),\
init,binary_op);
std::cout<<"result"<<i<<": "<<result<<std::endl;
}
t2 = clock()-t1;
std::cout<<"result: ";
std::cout.precision(10);
std::cout<< result <<std::endl;
std::cout<<"run time: "<<t2/CLOCKS_PER_SEC<<"s"<<std::endl;
return 0;
}
EDIT: Now that you have posted an example, here is how you could solve it:
You have n 2D points stored in a linear array like this (here n=4)
points = [p0 p1 p2 p3]
Based on your code I assume you want to calculate:
result = f(p0, p1) + f(p0, p2) + f(p0, p3) +
f(p1, p2) + f(p1, p3) +
f(p2, p3)
Where f() is your distance function which needs to be executed m times in total:
m = (n-1)*n/2
in this example: m=6
You can look at this problem as a triangular matrix:
[ p0 p1 p2 p3 ]
[ p1 p2 p3 ]
[ p2 p3 ]
[ p3 ]
Transforming this matrix into a linear vector with m elements while leaving out the diagonal elements results in:
[p1 p2 p3 p2 p3 p3]
The index of an element in the vector is k = [0,m-1].
Index k can be remapped to columns and rows of the triangular matrix to k -> (i,j):
i = n - 2 - floor(sqrt(-8*k + 4*n*(n-1)-7)/2.0 - 0.5)
j = k + i + 1 - n*(n-1)/2 + (n-i)*((n-i)-1)/2
i is the row and j is the column.
In our example:
0 -> (0, 1)
1 -> (0, 2)
2 -> (0, 3)
3 -> (1, 2)
4 -> (1, 3)
5 -> (2, 3)
Now you can put all this together and execute a modified distance functor m times which applies the aforementioned mapping to get the corresponding pairs based on the index and then sum up everything.
I modified your code accordingly:
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/random.h>
#include <math.h>
#include <iostream>
#include <stdio.h>
#include <stdint.h>
#define PRINT_DEBUG
typedef float Float;
// define a 2d point pair
typedef thrust::tuple<Float, Float> Point;
// return a random Point in [0,1)^2
Point make_point(void)
{
static thrust::default_random_engine rng(12345);
static thrust::uniform_real_distribution<Float> dist(0.0, 1.0);
Float x = dist(rng);
Float y = dist(rng);
return Point(x,y);
}
struct sqrt_dis_new
{
typedef thrust::device_ptr<Point> DevPtr;
DevPtr points;
const uint64_t n;
__host__
sqrt_dis_new(uint64_t n, DevPtr p) : n(n), points(p)
{
}
__device__
Float operator()(uint64_t k) const
{
// calculate indices in triangular matrix
const uint64_t i = n - 2 - floor(sqrt((double)(-8*k + 4*n*(n-1)-7))/2.0 - 0.5);
const uint64_t j = k + i + 1 - n*(n-1)/2 + (n-i)*((n-i)-1)/2;
#ifdef PRINT_DEBUG
printf("%llu -> (%llu, %llu)\n", k,i,j);
#endif
const Point& p1 = *(points.get()+j);
const Point& p2 = *(points.get()+i);
const Float xm = thrust::get<0>(p1)-thrust::get<0>(p2);
const Float ym = thrust::get<1>(p1)-thrust::get<1>(p2);
return 1.0/(-1.0 * sqrt(xm*xm + ym*ym));
}
};
int main()
{
const uint64_t N = 4;
// allocate some random points in the unit square on the host
thrust::host_vector<Point> h_points(N);
thrust::generate(h_points.begin(), h_points.end(), make_point);
// transfer to device
thrust::device_vector<Point> d_points = h_points;
const uint64_t count = (N-1)*N/2;
std::cout << count << std::endl;
thrust::plus<Float> binary_op;
const Float init = 0.0;
Float result = thrust::transform_reduce(thrust::make_counting_iterator((uint64_t)0),
thrust::make_counting_iterator(count),
sqrt_dis_new(N, d_points.data()),
init,
binary_op);
std::cout.precision(10);
std::cout<<"result: " << result << std::endl;
return 0;
}
It depends on your compute function which you do not specify.
Usually you unroll the loops and launch the kernel in a 2D manner for every combination of i and j if the computations are independent.
Have a look at the Thrust examples and identify similar use cases to your problem.
So I have to make this formula "y = y / (3/17) - z + x / (a % 2) + PI" in C
I am having a problem with (a%2) as it is returning odd values. ie 1%2 = 0.000001
int assignment7()
{
#define PI 3.14
int a=0,amod2;
double Ny=0,y=0,z=0,x=0;
printf("Enter values for x,y,z and a: ");
scanf("%d%lf%lf%lf",&a,&y,&z,&x);
//printf("%d,%lf,%lf,%lf\n",a,y,z,x);
//amod2=1%2;
//printf("%lf",amod2);
Ny=y/(double)(3/17) - z+x / amod2 + PI;
printf("%lf\n",Ny);
When you say:
printf("%lf",amod2);
the compiler expects amod2 to be a "long float" (aka a double), but you defined it as:
int amod2;
Also your prompt says "x,y,z and a" but you read in the order "a,y,z,x":
printf("Enter values for x,y,z and a: ");
scanf("%d%lf%lf%lf",&a,&y,&z,&x);
that's awkward at best.
EDIT: cleaned up a bit and made some assumptions about order of operations:
#include <stdio.h>
#define PI 3.14
#define DIVSOR (3.0/17.0)
int assignment7 ( void );
int assignment7 ( void ) {
double x = 0.0;
double y = 0.0;
double z = 0.0;
int a = 0;
int amod2;
double Ny;
printf("Enter values for x,y,z and a: ");
scanf("%lf%lf%lf%d",&x,&y,&z,&a);
amod2 = a % 2;
Ny = (y / DIVSOR) - z + (x / amod2) + PI;
printf("%lf\n", Ny);
return 0;
}
int main ( void ) { return assignment7(); }
You don't say what inputs you are giving it, (a test case with inputs and the expected results would be super helpful), but I can point out that x / (a % 2) is going to be infinity when a is 2 or 4 or 6 or ...
I need to write my own asin() function without math.h library with the use of Taylor series. It works fine for numbers between <-0.98;0.98> but when I am close to limits it stops with 1604 iterations and therefore is inaccurate.
I don't know how to make it more accurete. Any suggestions are very appreciated!
The code is following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define EPS 0.000000000001
double my_arcsin(double x)
{
long double a, an, b, bn;
a = an = 1.0;
b = bn = 2.0;
long double n = 3.0;
double xn;
double xs = x;
double xp = x;
int iterace = 0;
xn = xs + (a/b) * (my_pow(xp,n) / n);
while (my_abs(xn - xs) >= EPS)
{
n += 2.0;
an += 2.0;
bn += 2.0;
a = a * an;
b = b * bn;
xs = xn;
xn = xs + (a/b) * (my_pow(xp,n) / n);
iterace++;
}
//printf("%d\n", iterace);
return xn;
}
int main(int argc, char* argv[])
{
double x = 0.0;
if (argc > 2)
x = strtod(argv[2], NULL);
if (strcmp(argv[1], "--asin") == 0)
{
if (x < -1 || x > 1)
printf("nan\n");
else
{
printf("%.10e\n", my_arcsin(x));
//printf("%.10e\n", asin(x));
}
return 0;
}
}
And also a short list of my values and expected ones:
My values Expected values my_asin(x)
5.2359877560e-01 5.2359877560e-01 0.5
1.5567132089e+00 1.5707963268e+00 1 //problem
1.4292568534e+00 1.4292568535e+00 0.99 //problem
1.1197695150e+00 1.1197695150e+00 0.9
1.2532358975e+00 1.2532358975e+00 0.95
Even though the convergence radius of the series expansion you are using is 1, therefore the series will eventually converge for -1 < x < 1, convergence is indeed painfully slow close to the limits of this interval. The solution is to somehow avoid these parts of the interval.
I suggest that you
use your original algorithm for |x| <= 1/sqrt(2),
use the identity arcsin(x) = pi/2 - arcsin(sqrt(1-x^2)) for 1/sqrt(2) < x <= 1.0,
use the identity arcsin(x) = -pi/2 + arcsin(sqrt(1-x^2)) for -1.0 <= x < -1/sqrt(2).
This way you can transform your input x into [-1/sqrt(2),1/sqrt(2)], where convergence is relatively fast.
PLEASE NOTICE: In this case I strongly recommend #Bence's method, since you can't expect a slowly convergent method with low data accuracy to obtain arbitrary precision.
However I'm willing to show you how to improve the result using your current algorithm.
The main problem is that a and b grows too fast and soon become inf (after merely about 150 iterations). Another similar problem is my_pow(xp,n) grows fast when n grows, however this doesn't matter much in this very case since we could assume the input data goes inside the range of [-1, 1].
So I've just changed the method you deal with a/b by introducing ab_ratio, see my edited code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define EPS 0.000000000001
#include <math.h>
#define my_pow powl
#define my_abs fabsl
double my_arcsin(double x)
{
#if 0
long double a, an, b, bn;
a = an = 1.0;
b = bn = 2.0;
#endif
unsigned long _n = 0;
long double ab_ratio = 0.5;
long double n = 3.0;
long double xn;
long double xs = x;
long double xp = x;
int iterace = 0;
xn = xs + ab_ratio * (my_pow(xp,n) / n);
long double step = EPS;
#if 0
while (my_abs(step) >= EPS)
#else
while (1) /* manually stop it */
#endif
{
n += 2.0;
#if 0
an += 2.0;
bn += 2.0;
a = a * an;
b = b * bn;
#endif
_n += 1;
ab_ratio *= (1.0 + 2.0 * _n) / (2.0 + 2.0 * _n);
xs = xn;
step = ab_ratio * (my_pow(xp,n) / n);
xn = xs + step;
iterace++;
if (_n % 10000000 == 0)
printf("%lu %.10g %g %g %g %g\n", _n, (double)xn, (double)ab_ratio, (double)step, (double)xn, (double)my_pow(xp, n));
}
//printf("%d\n", iterace);
return xn;
}
int main(int argc, char* argv[])
{
double x = 0.0;
if (argc > 2)
x = strtod(argv[2], NULL);
if (strcmp(argv[1], "--asin") == 0)
{
if (x < -1 || x > 1)
printf("nan\n");
else
{
printf("%.10e\n", my_arcsin(x));
//printf("%.10e\n", asin(x));
}
return 0;
}
}
For 0.99 (and even 0.9999999) it soon gives correct results with more than 10 significant digits. However it gets slow when getting near to 1.
Actually the process has been running for nearly 12 minutes on my laptop calculating --asin 1, and the current result is 1.570786871 after 3560000000 iterations.
UPDATED: It's been 1h51min now and the result 1.570792915 and iteration count is 27340000000.