so i want to calculate angle and area of a triangle, but i need to assign the value of input and output using procedure. i cant find any examples about this and already tried some variation, but still have problem with using the pointer.
what i got so far
#include <stdio.h>
#include <math.h>
#define pi 3.141592654
// Declaration
void input(void);//get user input for triangle's sides
void calculate(int* x,int* y,int* z);//calculating area and angle
//main program
int main(void){
int x,y,z;
double a,b,c,height;
input(x,y,z);
calculate(x,y,z);
printf("angle a : %.3f degree\n",a);
printf("angle b : %.3f degree\n",b);
printf("angle c : %.3f degree\n",c);
printf("Area : %.3f cm2\n",height);
return 0;
}
//Definition
void input(int* x, int* y, int* z)
{
printf("insert side x :\n");
scanf("%d",*&x);
printf("insert side y :\n");
scanf("%d",*&y);
printf("insert side z :\n");
scanf("%d",*&z);
}
void calculate(int* x,int* y,int* z)
{
int s
s=(*x + *y + *z)*0.5;
*Area=sqrt(s*(s-x)*(s-y)*(s-z));
*a=acos(((*x * *x)+(*z * *z)-(*y * *y))/2(*x)(*z));
*b=acos(((*y * *y)+(*z * *z)-(*x * *x))/2(*y)(*z));
*c=acos(((*x * *x)+(*y * *y)-(*z * *z))/2(*x)(*y));
}
i got error in scan user input for x,y,z and assign degree and area result to area,a,b,c
Create needed arguments in both declaration and definition.
I don't think using pointers where they are not needed is good.
Your code should be like this:
#include <stdio.h>
#include <math.h>
#define pi 3.141592654
// Declaration
void input(int* x,int* y,int* z);
void calculate(double* a, double* b, double* c, double* Area, int x,int y,int z);
//main program
int main(void){
int x,y,z;
double a,b,c,height; /* It maybe good to rename height to Area */
input(&x,&y,&z);
calculate(&a,&b,&c,&height,x,y,z);
printf("angle a : %.3f degree\n",a);
printf("angle b : %.3f degree\n",b);
printf("angle c : %.3f degree\n",c);
printf("Area : %.3f cm2\n",height);
return 0;
}
//Definition
void input(int* x, int* y, int* z)
{
printf("insert side x :\n");
scanf("%d",x);
printf("insert side y :\n");
scanf("%d",y);
printf("insert side z :\n");
scanf("%d",z);
}
void calculate(double* a, double* b, double* c, double* Area, int x,int y,int z)
{
double s; /* type of s should be double, not int in this case */
s=(x + y + z)*0.5;
*Area=sqrt(s*(s-x)*(s-y)*(s-z));
*a=acos(((x * x)+(z * z)-(y * y))/(2 * x * z));
*b=acos(((y * y)+(z * z)-(x * x))/(2 * y * z));
*c=acos(((x * x)+(y * y)-(z * z))/(2 * x * y));
}
Related
Header file: circlehead.h
#include <stdio.h>
void circle_Data(float *r);
#define PI 3.14f
C FILE1: circle.c
#include "circlehead.h"
void circle_Data(float *r)
{
float ar=0,peri=0;
ar= PI * (*r) * (*r);
peri=2 * PI * (*r);
}
MAIN FUNCTION circle_main.c
#include<stdio.h>
#include "circlehead.h"
int main()
{
float r=5.24;
float ar, peri;
circle_Data(&r);
printf("Area is %f", ar);
printf("Perimeter is %f", peri);
}
I have linked the files into a single executable:
gcc -c circle.c
gcc -c circle_main.c
gcc -o x_exe circle.o circle_main.o
./x_exe
But I am getting the output as area: 3.728 and perimeter: 0.000
The code was compiled successfully. What am I doing wrong?
You had the right idea (passing a pointer) but you used it in the wrong way.
The problem was that you passed a pointer to the variable that wouldn't be changed, and didn't pass pointers to the variables that you did need to be changed, by circle_Data(). This version ought to behave in the way you wanted. The values of ar and peri that are local to main() are modified by circle_Data(), and the correct values can then be printed.
Note that circle_Data() gets a copy of r that it can modify, but that won't change the value of r in main(). It's a totally separate variable, just as ar and peri were in your first version.
#include "circlehead.h"
void circle_Data(float r, float* ar, float* peri )
{
*ar= PI * r * r;
*peri=2 * PI * r;
}
#include<stdio.h>
#include "circlehead.h"
int main()
{
float r=5.24;
float ar, peri;
circle_Data(r, &ar, &peri);
printf("Area is %f", ar);
printf("Perimeter is %f", peri);
}
You never assign ar or peri any values in main, so those variables don't every get assigned any values. That different variables with the same names get assigned elsewhere doesn't matter. (And the language would be pretty much unusable if it did.)
You could do something like the below. The problem is caused by the fact that you never pass in ar or peri by reference. So you code does not change them.
Main
#include<stdio.h>
#include "circlehead.h"
int main()
{
float r = 5.24;
float ar = 0;
float peri = 0;
circle_Data(r, ar, peri);
printf("Area is %f", ar);
printf("Perimeter is %f", peri);
return 0;
}
Header
#include <stdio.h>
void circle_Data(float r, float &ar, float &peri);
#define PI 3.14f
Body
#include "circlehead.h"
void circle_Data(float r, float &ar, float &peri)
{
ar = PI * (r) * (r);
peri = 2 * PI * (r);
}
Error in my code
Run Time Check Failure #3 - T
I tried many times to fix it,
but I failed.
I added pointer to x, y,
but "Run Time Check Failure #3 - T" — same error.
Can you help me to fix this error?
#include<stdio.h>
#include<math.h>
typedef struct {
double x, y;
}location;
double dist(location a,location b)
{
return sqrt(pow(b.x - a.x, 2.0) + pow(b.y -a.y, 2.0));
}
void func(location l, location e)
{
double z;
location a = l;
location b = e;
printf("enter two dots:");
scanf("%lf %lf", a.x, a.y);
printf("enter two dots:");
scanf("%1",a, b);
printf("%.2lf", z);
}
void main()
{
location l;
location e;
func(l, e);
}
The problems in the code were these:
1) scanf variable args must be passed as pointers. See scanf changes below.
2) initialise your variables in struct - thats the Run Time Check Failure #3 warning. see location initialisation below.
I also simplified a little. Hope that helps.
#include<stdio.h>
#include<math.h>
typedef struct {
double x, y;
}location;
double dist(location a, location b)
{
return sqrt((b.x - a.x) * (b.x - a.x) + (b.y - a.y) * (b.y - a.y));
}
void main()
{
location start = { 0 };
location end = { 0 };
printf("Enter start x, y co-ordinates: ");
scanf("%lf %lf", &start.x, &start.y);
printf("Enter end x, y co-ordinates: ");
scanf("%lf %lf", &end.x, &end.y);
printf("The distance between start and end: %lf\n", dist(start, end));
}
I am trying for the first time to use LAPACK from C to diagonalize a matrix and I am stuck.
I have been trying to modify this example http://rcabreral.blogspot.co.uk/2010/05/eigenvalues-clapack.html from zgeev to dgeev. I have looked at the DGEEV input parameters, http://www.netlib.org/lapack/explore-html/d9/d28/dgeev_8f.html but it seems I don't understand the well enough.
Hence, the code below produces:
**** On entry to DGEEV parameter number 9 had an illegal value**
EDIT: The error occurs in the call of dgeev spanning lines 48 to (including) 53.
EDIT: Note that the arguments differ from the specifications here
http://www.netlib.org/lapack/explore-html/d9/d28/dgeev_8f.html
in that they have been translated to pointers. That is necessary when using these Fortran routines in C, as explained here:
http://www.physics.orst.edu/~rubin/nacphy/lapack/cprogp.html
#include <stdio.h>
#include <math.h>
#include <complex.h>
#include <stdlib.h>
//.........................................................................
void dgeTranspose( double *Transposed, double *M ,int n) {
int i,j;
for(i=0;i<n;i++)
for(j=0;j<n;j++)
Transposed[i+n*j] = M[i*n+j];
}
//.........................................................................
// MatrixComplexEigensystem: computes the eigenvectors and eigenValues of input matrix A
// The eigenvectors are stored in columns
//.........................................................................
void MatrixComplexEigensystem( double *eigenvectorsVR, double *eigenvaluesW, double *A, int N){
int i;
double *AT = (double *) malloc( N*N*sizeof(double ) );
dgeTranspose( AT, A , N);
char JOBVL ='N'; // Compute Right eigenvectors
char JOBVR ='V'; // Do not compute Left eigenvectors
double VL[1];
int LDVL = 1;
int LDVR = N;
int LWORK = 4*N;
double *WORK = (double *)malloc( LWORK*sizeof(double));
double *RWORK = (double *)malloc( 2*N*sizeof(double));
int INFO;
dgeev_( &JOBVL, &JOBVR, &N, AT , &N , eigenvaluesW ,
VL, &LDVL,
eigenvectorsVR, &LDVR,
WORK,
&LWORK, RWORK, &INFO );
dgeTranspose( AT, eigenvectorsVR , N);
for(i=0;i<N*N;i++) eigenvectorsVR[i]=AT[i];
free(WORK);
free(RWORK);
free(AT);
}
int main(){
int i,j;
const int N = 3;
double A[] = { 1.+I , 2. , 3 , 4. , 5.+I , 6. , 7., 8., 9. + I};
double eigenVectors[N*N];
double eigenValues[N];
MatrixComplexEigensystem( eigenVectors, eigenValues, A, N);
printf("\nEigenvectors\n");
for(i=0;i<N;i++){
for(j=0;j<N;j++) printf("%e", eigenVectors[i*N + j]);
printf("\n");
}
printf("\nEigenvalues \n");
for(i=0;i<N;i++) printf("%e", eigenValues[i] );
printf("\n------------------------------------------------------------\n");
return 0;
}
You can not port directly from zgeev to dgeev. The zgeev gets a complex matrix and computes complex eigenvalues. While dgeev gets a real matrix and computes complex eigenvalues. In order to be consistent LAPACK uses WR and WI which is used for the real and imaginary part of each eigenvalue.
So note that dgeev definition is
void dgeev_(char* JOBVL, char* JOBVR, int* N, double* A, int* LDA, double* WR, double* WI, double* VL, int* LDVL, double* VR, int* LDVR, double* WORK, int* LWORK, int* INFO);
My suggestion for your example is to remove:
#include <complex.h>
remove I's from matrix of doubles:
double A[] = { 1. , 2. , 3 , 4. , 5. , 6. , 7., 8., 9.};
then double the size of eigenvalues vector:
double eigenValues[2*N];
and call dgeev using WR and WI:
double *eigenvaluesWR = eigenvaluesW;
double *eigenvaluesWI = eigenvaluesW+N;
dgeev_(&JOBVL, &JOBVR, &N, AT, &N,
eigenvaluesWR, eigenvaluesWI,
VL, &LDVL,
eigenvectorsVR, &LDVR,
WORK, &LWORK, &INFO);
I should write a program to convert Cartesian coordinates to Polar and vice versa with use of pointers, I wrote the following code but my function gives me segmentation fault. I tried to do it without the pointers and still it doesn't send my numbers to the function, can someone help to modify my pointer code? I'm new with C.
#include <stdio.h>
#include <math.h>
void cart(float *radius,float *degree)
{
float *x,*y,*radians;
*radians= (3.14159265359/180) * *degree;
*x= *radius * cos(*radians);
*y= *radius * sin(*radians);
}
int main()
{
float radius, radians, degree;
float x,y;
int M;
char C,P;
printf(" Enter C if you are converting Cartesian to Polar \n");
printf(" Enter P if you are converting Polar to Cartesian \n");
scanf("%c",&M);
if (M=='P')
{
printf("Enter the Radius and Angle separated by comma \n");
scanf("%f,%f",&radius,°ree);
cart(&radius,°ree);
printf("Cartesian form is (%f,%f) \n",x,y);
}
else if (M=='C')
{
printf("Enter values of X and Y separated by comma \n");
scanf("%f,%f",&x,&y);
radius=sqrt(((x*x)+(y*y))); // finding radius
radians=atan(y/x); //finding angle in radians
printf("Polar form is (%f,%f) \n",radius,radians); //angle is in radians
}
return 0;
}
The first thing to note is in your 'cart' function:
void cart(float *radius,float *degree)
{
float *x,*y,*radians;
*radians= (3.14159265359/180) * *degree;
*x= *radius * cos(*radians);
*y= *radius * sin(*radians);
}
You have declared pointers named x, y and radians, but they do not yet point to anything.
So when you 'de-reference' them with *x, *y and *radians you are accessing memory that does not exist, which will result in undefined behavior, possibly a segmentation fault.
I would assume that your goal is to get the x, y and radians from your main function to match up with those, so you should be passing them into the function as well.
I think what you mean is this:
void cart(float radius, float degree, float *x, float *y)
{
float radians;
if ((x == NULL) || (y == NULL))
return;
radians = 3.14159265359 / 180.0 * degree;
*x = radius * cos(radians);
*y = radius * sin(radians);
}
and call it like this
float x, y, radius, degree;
if (scanf("%f,%f", &radius, °ree) == 2)
cart(radius, degree, &x, &y);
else
{
fprintf(stderr, "error: invalid input expexted <radius,degree>\n");
exit(1);
}
In your original implementation you were declaring x and y as pointers but you hadn't initialized them, since you mean to modify them in the function you need to pass pointers which contain the addresses of the variables you wish to modify, for that you use the & address of operator.
I'm trying to parallelize a function which takes as input three arrays (x, y, and prb) and one scalar, and outputs three arrays (P1, Pt1, and Px).
The original c code is here (the outlier and E are inconsequential):
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define max(A, B) ((A) > (B) ? (A) : (B))
#define min(A, B) ((A) < (B) ? (A) : (B))
void cpd_comp(
double* x,
double* y,
double* prb,
double* sigma2,
double* outlier,
double* P1,
double* Pt1,
double* Px,
double* E,
int N,
int M,
int D
)
{
int n, m, d;
double ksig, diff, razn, outlier_tmp, sp;
double *P, *temp_x;
P = (double*) calloc(M, sizeof(double));
temp_x = (double*) calloc(D, sizeof(double));
ksig = -2.0 * *sigma2;
for (n=0; n < N; n++) {
sp=0;
for (m=0; m < M; m++) {
razn=0;
for (d=0; d < D; d++) {
diff=*(x+n+d*N)-*(y+m+d*M); diff=diff*diff;
razn+=diff;
}
*(P+m)=exp(razn/ksig) ;
sp+=*(P+m);
}
*(Pt1+n)=*(prb+n);
for (d=0; d < D; d++) {
*(temp_x+d)=*(x+n+d*N)/ sp;
}
for (m=0; m < M; m++) {
*(P1+m)+=((*(P+m)/ sp) **(prb+n));
for (d=0; d < D; d++) {
*(Px+m+d*M)+= (*(temp_x+d)**(P+m)**(prb+n));
}
}
*E += -log(sp);
}
*E +=D*N*log(*sigma2)/2;
free((void*)P);
free((void*)temp_x);
return;
}
Here is my attempt at parallelizing it:
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
/*headers*/
void cpd_comp(
float * x, //Points to register [N*D]
float * y, //Points to be registered [M*D]
float * prb, //Vector of probabilities [N]
float * sigma2, //Square of sigma
float ** P1, //P1, output, [M]
float ** Pt1, //Pt1, output, [N]
float ** Px, //Px, output, [M*3]
int N, //Number of points, i.e. rows, in x
int M //Number of points, i.e. rows, in
);
__global__ void d_computeP(
float * P,
float * P1,
float * Px,
float * ProbabilityMatrix,
float * x,
float * y,
float * prb,
float ksig,
const int N,
const int M);
__global__ void d_sumP(
float * sp,
float * P1timessp,
float * Pxtimessp,
float * P1,
float * Px,
const int N,
const int M);
/*implementations*/
void cpd_comp(
float * x, //Points to register [N*D]
float * y, //Points to be registered [M*D]
float * prb, //Vector of probabilities [N]
float * sigma2, //Scalar
float ** P1, //P1, output, [M]
float ** Pt1, //Pt1, output, [N]
float ** Px, //Px, output, [M*3]
int N, //Number of points, i.e. rows, in x
int M //Number of points, i.e. rows, in y
){
//X is generatedPointPos
//Y is points
float
*P,
*P1timessp,
*Pxtimessp,
ksig = -2.0 * (*sigma2),
*h_sumofP = new float[N], //sum of P, on host
*d_sumofP; //sum of P, on device
cudaMalloc((void**)&P, sizeof(float)*M*N);
cudaMalloc((void**)&P1timessp,sizeof(float)*M*N);
cudaMalloc((void**)&Pxtimessp,sizeof(float)*M*N*3);
cudaMalloc((void**)&d_sumofP, sizeof(float)*N);
cudaMalloc((void**)P1, sizeof(float)*M);
cudaMalloc((void**)Px, sizeof(float)*M*3);
cudaMalloc((void**)Pt1, sizeof(float)*N);
d_computeP<<<dim3(N,M/1024+1),M>1024?1024:M>>>(P,P1timessp,Pxtimessp,NULL,x,y,prb,ksig,N,M);
for(int n=0; n<N; n++){
thrust::device_ptr<float>dev_ptr(P);
h_sumofP[n] = thrust::reduce(dev_ptr+M*n,dev_ptr+M*(n+1),0.0f,thrust::plus<float>());
}
cudaMemcpy(d_sumofP,h_sumofP,sizeof(float)*N,cudaMemcpyHostToDevice);
d_sumP<<<M/1024+1,M>1024?1024:M>>>(d_sumofP,P1timessp,Pxtimessp,*P1,*Px,N,M);
cudaMemcpy(*Pt1,prb,sizeof(float)*N,cudaMemcpyDeviceToDevice);
cudaFree(P);
cudaFree(P1timessp);
cudaFree(Pxtimessp);
cudaFree(d_sumofP);
delete[]h_sumofP;
}
/*kernels*/
__global__ void d_computeP(
float * P,
float * P1,
float * Px,
float * ProbabilityMatrix,
float * x,
float * y,
float * prb,
float ksig,
const int N,
const int M){
//thread configuration: <<<dim3(N,M/1024+1),1024>>>
int m = threadIdx.x+blockIdx.y*blockDim.x;
int n = blockIdx.x;
if(m>=M || n>=N) return;
float
x1 = x[3*n],
x2 = x[3*n+1],
x3 = x[3*n+2],
diff1 = x1 - y[3*m],
diff2 = x2 - y[3*m+1],
diff3 = x3 - y[3*m+2],
razn = diff1*diff1+diff2*diff2+diff3*diff3,
Pm = __expf(razn/ksig), //fast exponentiation
prbn = prb[n];
P[M*n+m] = Pm;
__syncthreads();
P1[N*m+n] = Pm*prbn;
Px[3*(N*m+n)+0] = x1*Pm*prbn;
Px[3*(N*m+n)+1] = x2*Pm*prbn;
Px[3*(N*m+n)+2] = x3*Pm*prbn;
}
__global__ void d_sumP(
float * sp,
float * P1timessp,
float * Pxtimessp,
float * P1,
float * Px,
const int N,
const int M){
//computes P1 and Px
//thread configuration: <<<M/1024+1,1024>>>
int m = threadIdx.x+blockIdx.x*blockDim.x;
if(m>=M) return;
float
P1m = 0,
Pxm1 = 0,
Pxm2 = 0,
Pxm3 = 0;
for(int n=0; n<N; n++){
float spn = 1/sp[n];
P1m += P1timessp[N*m+n]*spn;
Pxm1 += Pxtimessp[3*(N*m+n)+0]*spn;
Pxm2 += Pxtimessp[3*(N*m+n)+1]*spn;
Pxm3 += Pxtimessp[3*(N*m+n)+2]*spn;
}
P1[m] = P1m;
Px[3*m+0] = Pxm1;
Px[3*m+1] = Pxm2;
Px[3*m+2] = Pxm3;
}
However, to my horror, it runs much, much slower than the original version. How do I make it run faster? Please explain things thoroughly since I am very new to CUDA and parallel programming and have no experience in algorithms.
Do note that the c version has column-major ordering and the CUDA version has row-major. I have done several tests to make sure that the result is correct. It's just extremely slow and takes up a LOT of memory.
Any help is greatly appreciated!
EDIT: More information: N and M are on the order of a few thousand (say, 300-3000) and D is always 3. The CUDA version expects arrays to be device memory, except for variables prefixed with h_.
Before trying any CUDA-specific optimizations, profile your code to see where time is being spent.
Try and arrange your array reads/writes so that each CUDA thread uses a strided access pattern. For example, currently you have
int m = threadIdx.x+blockIdx.y*blockDim.x;
int n = blockIdx.x;
if(m>=M || n>=N) return;
diff1 = x1 - y[3*m],
diff2 = x2 - y[3*m+1],
diff3 = x3 - y[3*m+2],
So thread 1 will read from y[0],y[1],y[2] etc. Instead, rearrange your data so that thread 1 reads from y[0],y[M],y[2*M] and thread 2 reads from y[1],y[M+1],y[2*M+1] etc. You should follow this access pattern for other arrays.
Also, you may want to consider whether you can avoid the use of __syncthreads(). I don't quite follow why it's necessary in this algorithm, it might be worth removing it to see if it improves performance ( even if it produces incorrect results ).
The key to good CUDA performance is almost always to make as near to optimal memory access as possible. Your memory access pattern looks very similar to matrix multiplication. I would start with a good CUDA matrix multiplication implementation, being sure to understand why it's implemented the way it is, and then modify that to suit your needs.