How can I force printf format codes like %f to produce aligned padded output for both positive and negative numbers. The following program is a minimal yet complete example of my problem:
#include<stdio.h>
#include<stdlib.h>
int main(int argc, char *argv[])
{
int step = 1;
float score = 0.1;
float phi = 0.1;
float rho = 0.2;
printf(" # score phi rho\n");
printf("%2d %2.3f %1.2f %1.2f\n", step, score, phi, rho);
step++;
score++;
phi++;
rho++;
printf("%2d %2.3f %1.2f %1.2f\n", step, score, phi, rho);
step++;
score -= 1.2;
phi++;
rho++;
printf("%2d %2.3f %1.2f %1.2f\n", step, score, phi, rho);
return 0;
}
and here is the result:
# score phi rho
1 0.100 0.10 0.20
2 1.100 1.10 1.20
3 -0.100 2.10 2.20
I wanted to have an output like
# score phi rho
1 +0.100 0.10 0.20
2 +1.100 1.10 1.20
3 -0.100 2.10 2.20
or
# score phi rho
1 0.100 0.10 0.20
2 1.100 1.10 1.20
3 -0.10 2.10 2.20
My current bad solution is an if-else statement conditioned on the sign of score variable to choose from two different printfs for plus and minus signs.
From the format specification syntax, the printf conversion specification follows
%[flags][width][.precision][size]type
So, if we set the flag directive to +, sign (+ or -) appears as a prefix of the output value. For example,
printf("%+5.2f\n%+5.2f\n", -19.86, 19.86); // prints -19.86 \newline +19.86
Related
I am calculating the value of pi(3.14) by finding area under the curve 4/1+(x*x) from limits 0 to 1. Following is MPI program in C for doing so.
However, when there is one process, it gives correct value. If I give more than one process then only process having rank 0 gives some value while other processes give 0.0 as their locally computed value
What error is there in the following code ?
#include<mpi.h>
#include<math.h>
#include<stdio.h>
#define MAX_NAME 80
int main(int argc,char *argv[])
{
MPI_INIT(&argc,&argv);
int rank,nprocs,len;
double i=0.0;
double n=1000000000.0;
double PI25DT =3.141592653589793238462643;
double mypi,pi,step,sum,x;
char name[MAX_NAME];
double start_time,end_time,computation_time;
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Get_processor_name(name,&len);
start_time=MPI_Wtime();
sum=0.0;
step=1.0/(double)n;
x=0.0;
x=(double)rank*(n/nprocs);
x=x+step;
double temp=x;
for(i=temp;i<(temp+(n/nprocs));i=i+1.0)
{
sum+=step*(4.0/(1.0+(double)(x*x)));
x=x+step;
}
mypi=sum;
printf("\nProcessor: %d Name: %s Sum: %.16f \n",rank,name,mypi);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Reduce(&mypi,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
if(rank==0)
{
printf("\nValue of Pi is %.16f approximately .Error is %.16f \n",pi,fabs(pi-PI25DT));
end_time=MPI_Wtime();
computation_time=end_time-start_time;
printf("\nComputation time is: %f seconds.\n",computation_time);
}
MPI_Finalize();
}
Output of above code for one process and more than one process is as:
OUTPUT
The entire logic in your code seems flawed - you have confused integers and doubles in your "for" loop which means the loop limits are all wrong.
The easiest way to debug simple programs is just to print to the screen - I'm eternally surprised by how reticent people are to just get their program to tell them what it's actually doing.
If I set n=5.0 and change the loop to:
x=x+step;
printf("rank %d has x = %lf\n", rank, x);
then on 2 processes I get:
rank 0 has x = 0.400000
rank 0 has x = 0.600000
rank 0 has x = 0.800000
Processor: 0 Name: starless Sum: 2.0471212357622095
Value of Pi is 2.3040395511642187 approximately .Error is 0.8375531024255745
Computation time is: 0.000067 seconds.
rank 1 has x = 2.900000
rank 1 has x = 3.100000
rank 1 has x = 3.300000
Processor: 1 Name: starless Sum: 0.2569183154020093
showing the loop limits are wrong.
I don't think they're even correct on a single process:
rank 0 has x = 0.400000
rank 0 has x = 0.600000
rank 0 has x = 0.800000
rank 0 has x = 1.000000
rank 0 has x = 1.200000
Surely the integral is between 0.0 and 1.0?
Regards,
David
I understand that operations across SIMD lanes should generally be avoided.
However, sometimes it has to be done.
I am using AVX2 intrinsics, and have 8 floating point values in an __m256.
I want to know the lowest value in this vector, and to complicate matters: also in which slot this was.
My current solution makes a round trip to memory, which I don't like:
float closestvals[8];
_mm256_store_ps( closestvals, closest8 );
float closest = closestvals[0];
int closestidx = 0;
for ( int k=1; k<8; ++k )
{
if ( closestvals[k] < closest )
{
closest = closestvals[ k ];
closestidx = k;
}
}
What would be a good way to do this without going to/from memory?
You can try this:
#include <stdio.h>
#include <x86intrin.h>
#include <math.h>
/* gcc -O3 -Wall -m64 -march=haswell hor_min.c */
int print_vec_ps(__m256 x);
int main() {
float x[8]={1.2f, 3.6f, 2.1f, 9.4f, 4.0f, 0.1f, 8.9f, 3.3f};
/* Note that the results are not useful if one of the inputs is a 'not a number'. The input below leads to indx = 32 (!) */
// float x[8]={1.2f, 3.6f, 2.1f, NAN, 4.0f, 2.0f , 8.9f, 3.3f};
__m256 v0 = _mm256_load_ps(x); /* _mm256_shuffle_ps instead of _mm256_permute_ps is also possible, see Peter Cordes' comments */
__m256 v1 = _mm256_permute_ps(v0,0b10110001); /* swap floats: 0<->1, 2<->3, 4<->5, 6<->7 */
__m256 v2 = _mm256_min_ps(v0,v1);
__m256 v3 = _mm256_permute_ps(v2,0b01001110); /* swap floats */
__m256 v4 = _mm256_min_ps(v2,v3);
__m256 v5 = _mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(v4),0b01001110)); /* swap 128-bit lanes */
__m256 v_min = _mm256_min_ps(v4,v5);
__m256 mask = _mm256_cmp_ps(v0,v_min,0);
int indx = _tzcnt_u32(_mm256_movemask_ps(mask));
printf(" 7 6 5 4 3 2 1 0\n");
printf("v0 = ");print_vec_ps(v0 );
printf("v1 = ");print_vec_ps(v1 );
printf("v2 = ");print_vec_ps(v2 );
printf("\nv3 = ");print_vec_ps(v3 );
printf("v4 = ");print_vec_ps(v4 );
printf("\nv5 = ");print_vec_ps(v5 );
printf("v_min = ");print_vec_ps(v_min );
printf("mask = ");print_vec_ps(mask );
printf("indx = ");printf("%d\n",indx);
return 0;
}
int print_vec_ps(__m256 x){
float v[8];
_mm256_storeu_ps(v,x);
printf("%5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f %5.2f\n",
v[7],v[6],v[5],v[4],v[3],v[2],v[1],v[0]);
return 0;
}
Output:
./a.out
7 6 5 4 3 2 1 0
v0 = 3.30 8.90 0.10 4.00 9.40 2.10 3.60 1.20
v1 = 8.90 3.30 4.00 0.10 2.10 9.40 1.20 3.60
v2 = 3.30 3.30 0.10 0.10 2.10 2.10 1.20 1.20
v3 = 0.10 0.10 3.30 3.30 1.20 1.20 2.10 2.10
v4 = 0.10 0.10 0.10 0.10 1.20 1.20 1.20 1.20
v5 = 1.20 1.20 1.20 1.20 0.10 0.10 0.10 0.10
v_min = 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10
mask = 0.00 0.00 -nan 0.00 0.00 0.00 0.00 0.00
indx = 5
In the previous version of this answer, the 128-bit lanes were swapped with _mm256_permute2f128_ps.
In this updated answer _mm256_permute2f128_ps is replaced by _mm256_permute4x64_pd,
which is faster on AMD CPUs and on Intel KNL, see #Peter Cordes' comments.
But note that _mm256_permute4x64_pd requires AVX2, while AVX is sufficient for _mm256_permute2f128_ps.
Also note that the results of this code are useless if one of the input values is a 'not a number' (NAN).
float batt = ((float)rand()/(float)(RAND_MAX)) * 5.0;
demo_printf("Battery:%.2f\n", batt);
From the line above , its doing a random numbers for batt. would like to ask how do i do about it to make it as a loop decreasing like very bit by bit . maybe per 0.1.
I cannot for loop the demoprint. i cannot . i can only loop elswhere
Huh?
This question is very strange. Do you mean something like this:
float batt = 5.f * rand() / RAND_MAX;
while(batt > 0.f)
{
printf("Battery is %.1f\n", batt);
batt -= 0.1f;
}
Sample output:
Battery is 4.2
Battery is 4.1
Battery is 4.0
Battery is 3.9
Battery is 3.8
Battery is 3.7
Battery is 3.6
Battery is 3.5
Battery is 3.4
[ ... more like this ...]
Battery is 1.0
Battery is 0.9
Battery is 0.8
Battery is 0.7
Battery is 0.6
Battery is 0.5
Battery is 0.4
Battery is 0.3
Battery is 0.2
Battery is 0.1
Battery is 0.0
// obtain initial random float
float batt = ((float)rand()/(float)(RAND_MAX)) * 5.0;
// iterate beginning with this random float downwards by steps of 0.1f
for (float x = batt; x > 0; x -= 0.1) {
demo_printf("Battery:%.2f\n", x);
}
Use 2 variables, target and current:
float current = 5.0f;
float target = 2.5f;
while(current > target) {
// print here
current -= 0.1f;
}
current = target;
// final print here
Just be aware that conversion of float literal constant 0.1f to binary float will never be perfectly accurate, and the decrement will not be exactly 0.1. But fixing that is for another question.
This is the output I expect
x |x|
1.2 1.2
-2.3 2.3
3.4 3.4
but I keep getting this:
x |x|
1.2 1.2
-2.3 2.3
3.4 3.4
Here is my part of code:
printf(" x |x|\n");
for (i = 1; i <= n; i++)
{
printf(" %.1f %.1f\n", array[i], array1[i]);
}
how do I change it?
You should specify a number before dot in %.1f that shows all the number including '-' for example:
printf("%3.1f",array[i]);
Will result in :
x |x|
1.2 1.2
-2.3 2.3
3.4 3.4
It means that a number should fill 3 places in screen.
Sorry for bad English.
I'm trying to reporduce some code from R in C, so I'm trying to fit a linear regression using the gsl_fit_linear() function.
In R I'd use the lm() function, which returns a p-value for the fit using this code:
lmAvgs<- lm( c(1.23, 11.432, 14.653, 21.6534) ~ c(1970, 1980, 1990, 2000) )
summary(lmAvgs)
I've no idea though how to go from the C output to a p-value, my code looks something like this so far:
int main(void)
{
int i, n = 4;
double x[4] = { 1970, 1980, 1990, 2000 };
double y[4] = {1.23, 11.432, 14.653, 21.6534};
double c0, c1, cov00, cov01, cov11, sumsq;
gsl_fit_linear (x, 1, y, 1, n, &c0, &c1, &cov00, &cov01, &cov11, &sumsq);
}
This seems to correctly calculate slope and intercept but I don't know how to get a p-value. I'm novice at stats and C!
Everything is on : http://en.wikipedia.org/wiki/Ordinary_least_squares. but here is a piece of code which display an output similar to summary(lmAvgs) in R. To run this, you need the GSL Library :
int n = 4;
double x[4] = { 1970, 1980, 1990, 2000};
double y[4] = {1.23, 11.432, 14.653, 21.6534};
double c0, c1, cov00, cov01, cov11, sumsq;
gsl_fit_linear (x, 1, y, 1, n, &c0, &c1, &cov00, &cov01, &cov11, &sumsq);
cout<<"Coefficients\tEstimate\tStd. Error\tt value\tPr(>|t|)"<<endl;
double stdev0=sqrt(cov00);
double t0=c0/stdev0;
double pv0=t0<0?2*(1-gsl_cdf_tdist_P(-t0,n-2)):2*(1-gsl_cdf_tdist_P(t0,n-2));//This is the p-value of the constant term
cout<<"Intercept\t"<<c0<<"\t"<<stdev0<<"\t"<<t0<<"\t"<<pv0<<endl;
double stdev1=sqrt(cov11);
double t1=c1/stdev1;
double pv1=t1<0?2*(1-gsl_cdf_tdist_P(-t1,n-2)):2*(1-gsl_cdf_tdist_P(t1,n-2));//This is the p-value of the linear term
cout<<"x\t"<<c1<<"\t"<<stdev1<<"\t"<<t1<<"\t"<<pv1<<endl;
double dl=n-2;//degrees of liberty
double ym=0.25*(y[0]+y[1]+y[2]+y[3]); //Average of vector y
double sct=pow(y[0]-ym,2)+pow(y[1]-ym,2)+pow(y[2]-ym,2)+pow(y[3]-ym,2); // sct = sum of total squares
double R2=1-sumsq/sct;
cout<<"Multiple R-squared: "<<R2<<", Adjusted R-squared: "<<1-double(n-1)/dl*(1-R2)<<endl;
double F=R2*dl/(1-R2);
double p_value=1-gsl_cdf_fdist_P(F,1,dl);
cout<<"F-statistic: "<<F<<" on 1 and "<<n-2<<" DF, p-value: "<<p_value<<endl;
Which gives :
Coefficients Estimate Std. Error t value Pr(>|t|)
Intercept -1267.91 181.409 -6.98922 0.0198633
x 0.644912 0.0913886 7.05681 0.0194956
Multiple R-squared: 0.961389, Adjusted R-squared: 0.942083
F-statistic: 49.7986 on 1 and 2 DF, p-value: 0.0194956
R gives :
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.268e+03 1.814e+02 -6.989 0.0199 *
c(1970, 1980, 1990, 2000) 6.449e-01 9.139e-02 7.057 0.0195 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.044 on 2 degrees of freedom
Multiple R-squared: 0.9614, Adjusted R-squared: 0.9421
F-statistic: 49.8 on 1 and 2 DF, p-value: 0.01950