In the following code (compiled with /clr) which is deliberately writing to the Console, the threads are either freezing the GUI or not giving any results.
Any ideas are welcome.
Also in particular the line this->Invoke(d, gcnew array { AA, BB });
is only taking 2 arguments and the code compiles. If I try 3 arguments as the delegate requires, the code doesn't compile. Any ideas why?
Please help.
#include <iostream>
#include <stdlib.h>
#using <System.dll>
#using <System.Windows.Forms.dll>
#using <System.Drawing.dll>
using namespace System;
using namespace System::ComponentModel;
using namespace System::Threading;
using namespace System::Windows::Forms;
#define NN 1000000
unsigned long long AA = 0;
unsigned long long BB = 99999999999999999ull;
unsigned long long PP[NN];
bool IsPrime(unsigned long long number)
{
if (number == 2 || number == 3)
return true;
if (number % 2 == 0 || number % 3 == 0)
return false;
int divisor = 6;
while (divisor * divisor - 2 * divisor + 1 <= number)
{
if (number % (divisor - 1) == 0)
return false;
if (number % (divisor + 1) == 0)
return false;
divisor += 6;
}
return true;
}
void NextPrime(unsigned long long a, unsigned long long * prime)
{
while (!IsPrime(++a))
{
}
* prime = a;
}
void FindAllPrimes(unsigned long long a, unsigned long long b, unsigned long long * prime)
{
unsigned long long k, n;
n = 0;
for (k = a; k < b; k++)
{
if (IsPrime(k))
{
prime[n] = k;
n++;
if (n >= 1000000)
{
break;
}
}
}
}
namespace MyNameSpace
{
public ref class Form1 : public Form
{
delegate void Delegate_Call(unsigned long long a, unsigned long long b, unsigned long long * prime);
private: void Function_1()
{
FindAllPrimes(AA, BB, PP);
}
private: void Function_2()
{
this->Function_3(AA, BB, PP);
}
private: void Function_3(unsigned long long a, unsigned long long b, unsigned long long * prime)
{
if (this->textBox1->InvokeRequired)
{
Delegate_Call^ d = gcnew Delegate_Call(this, &Form1::Function_3);
this->Invoke(d, gcnew array<Object^> { AA, BB });
int n = rand() % NN;
this->textBox1->Text = PP[n].ToString();
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
}
else
{
FindAllPrimes(AA, BB, PP);
int n = rand() % NN;
this->textBox1->Text = PP[n].ToString();
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
}
}
private: Thread^ Thread_A;
private: BackgroundWorker^ backgroundWorker1;
private: TextBox^ textBox1;
private: Button^ Button_DoNotKnow;
private: Button^ Button_UnSafeCall;
private: Button^ Button_SafeCall;
private: Button^ Button_BGWorker;
private: System::ComponentModel::IContainer^ components;
public: Form1()
{
components = nullptr;
InitializeComponent();
}
protected: ~Form1()
{
if (components != nullptr)
{
delete components;
}
}
private: void InitializeComponent()
{
this->textBox1 = gcnew System::Windows::Forms::TextBox();
this->Button_DoNotKnow = gcnew System::Windows::Forms::Button();
this->Button_UnSafeCall = gcnew System::Windows::Forms::Button();
this->Button_SafeCall = gcnew System::Windows::Forms::Button();
this->Button_BGWorker = gcnew System::Windows::Forms::Button();
this->backgroundWorker1 = gcnew System::ComponentModel::BackgroundWorker();
this->SuspendLayout();
//
// textBox1
//
this->textBox1->Location = System::Drawing::Point(30, 30);
this->textBox1->Name = "textBox1";
this->textBox1->Size = System::Drawing::Size(310, 20);
this->textBox1->TabIndex = 0;
//
// Button_DoNotKnow
//
this->Button_DoNotKnow->Location = System::Drawing::Point(20, 70);
this->Button_DoNotKnow->Name = "Button_DoNotKnow";
this->Button_DoNotKnow->TabIndex = 1;
this->Button_DoNotKnow->Text = "DoNotKnow";
this->Button_DoNotKnow->Click += gcnew System::EventHandler(this, &Form1::Button_DoNotKnow_Click);
//
// Button_UnSafeCall
//
this->Button_UnSafeCall->Location = System::Drawing::Point(100, 70);
this->Button_UnSafeCall->Name = "Button_UnSafeCall";
this->Button_UnSafeCall->TabIndex = 2;
this->Button_UnSafeCall->Text = "Unsafe Call";
this->Button_UnSafeCall->Click += gcnew System::EventHandler(this, &Form1::Button_UnSafeCall_Click);
//
// Button_SafeCall
//
this->Button_SafeCall->Location = System::Drawing::Point(180, 70);
this->Button_SafeCall->Name = "Button_SafeCall";
this->Button_SafeCall->TabIndex = 3;
this->Button_SafeCall->Text = "Safe Call";
this->Button_SafeCall->Click += gcnew System::EventHandler(this, &Form1::Button_SafeCall_Click);
//
// Button_BGWorker
//
this->Button_BGWorker->Location = System::Drawing::Point(260, 70);
this->Button_BGWorker->Name = "Button_BGWorker";
this->Button_BGWorker->TabIndex = 4;
this->Button_BGWorker->Text = "Safe BW Call";
this->Button_BGWorker->Click += gcnew System::EventHandler(this, &Form1::Button_BGWorker_Click);
//
// backgroundWorker1
//
this->backgroundWorker1->RunWorkerCompleted += gcnew System::ComponentModel::RunWorkerCompletedEventHandler( this, &Form1::backgroundWorker1_RunWorkerCompleted);
//
// Form1
//
this->ClientSize = System::Drawing::Size(380, 180);
this->Controls->Add(this->Button_BGWorker);
this->Controls->Add(this->Button_SafeCall);
this->Controls->Add(this->Button_UnSafeCall);
this->Controls->Add(this->Button_DoNotKnow);
this->Controls->Add(this->textBox1);
this->Name = "Form1";
this->Text = "Form1";
this->ResumeLayout(false);
this->PerformLayout();
}
private: void Button_DoNotKnow_Click(Object^ sender, EventArgs^ e)
{
FindAllPrimes(AA, BB, PP);
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
}
private: void Button_UnSafeCall_Click(Object^ sender, EventArgs^ e)
{
this->Thread_A = gcnew Thread(gcnew ThreadStart(this, &Form1::Function_1));
this->Thread_A->Start();
}
private: void Button_SafeCall_Click(Object^ sender, EventArgs^ e)
{
this->Thread_A = gcnew Thread(gcnew ThreadStart(this, &Form1::Function_2));
this->Thread_A->Start();
}
private: void Button_BGWorker_Click(Object^ sender, EventArgs^ e)
{
this->backgroundWorker1->RunWorkerAsync();
}
private: void backgroundWorker1_RunWorkerCompleted(Object^ sender, RunWorkerCompletedEventArgs^ e)
{
FindAllPrimes(AA, BB, PP);
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
}
};
}
[STAThread]
int main()
{
Application::EnableVisualStyles();
Application::Run(gcnew MyNameSpace::Form1());
}
First of all the reason why the Form hangs is that you are printing 1 million lines to the console from the UI thread. Try to change the NN to lets say 100 and you will see the hang goes away. If you want to keep the NN 1 million, than the only option is to move it to non UI thread.
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
The second problem, related to the invoke is, that the Invoke expects array of System::Object^. There is no implicit conversion from unsigned long long* to System::Object^, so we have to convert it to IntPtr, which will be converted to System::Object^.
private: void Function_3(unsigned long long a, unsigned long long b, unsigned long long * prime)
{
if (this->textBox1->InvokeRequired)
{
Delegate_Call^ d = gcnew Delegate_Call(this, &Form1::Function_3);
//this->Invoke(d, gcnew array<Object^> { AA, BB, IntPtr(PP) });
// btw, you can use it also this way
this->Invoke(d, AA, BB, IntPtr(PP));
//int n = rand() % NN;
// we are still on non UI thread so this will end in CrossThread eception ofcourse
//this->textBox1->Text = PP[n].ToString();
//for (int n = 0; n < NN; n++)
// std::cout << PP[n] << std::endl;
}
else
{
FindAllPrimes(AA, BB, PP);
int n = rand() % NN;
this->textBox1->Text = PP[n].ToString();
for (int n = 0; n < NN; n++)
std::cout << PP[n] << std::endl;
}
}
Related
#include <iostream>
#include <cmath>
#include <iomanip>
#include <cstring>
#include <cstddef>
class ArrayList
{
public:
ArrayList();
void expand();
void store(std::string x);
void display_size();
friend std::ostream& operator<<(std::ostream& os, const ArrayList &arr);
void clean();
ArrayList(const ArrayList &arr);
~ArrayList();
void operator=(const ArrayList& arr);
private:
int size; //total size of your dynamic array
int max; //double the size of the array.
int free_space; //total size of unused space in your dynamic array
char *array; //a pointer used to create a dynamic array
};
ArrayList::ArrayList()
{
size = 2;
free_space = 1;
array = new char[2]();
array[0] = ' ';
array[1] = '\0';
max = size;
}
void ArrayList::expand()
{
max = size + size;
char *temp = new char[max];
for( int i = 0; i < max; i++ )
{
array[i] = temp[i];
}
free_space = free_space + size;
delete [] array;
array = temp;
size = max;
}
void ArrayList::store(std::string x)
{
int taken = max - free_space;
int y = x.size();
free_space = free_space - y;
for(int i = 0; i < y; i++)
{
if(y >= size)
{
while(y >= size)
{
expand();
}
}
else
{
array[i + taken] = x[i];
}
}
}
std::ostream& operator<<(std::ostream& os, const ArrayList &arr)
{
os << arr.array;
return os;
}
void ArrayList::display_size()
{
size = max;
std::cout << "Array Content: ";
std::cout << array;
std::cout << std::endl;
std::cout << "Remaining size: ";
std::cout << free_space;
std::cout << std::endl;
}
void ArrayList::clean()
{
int x = 0;
for(int i = 0; i < size; i++)
{
if(array[i] == ' ')
{
x++;
}
}
size = x;
}
ArrayList::ArrayList(const ArrayList &arr)
{
array = new char[size + 1];
strcpy(array, arr.array);
}
ArrayList::~ArrayList()
{
delete [] array;
}
void ArrayList::operator=(const ArrayList& arr)
{
int new_length = strlen(arr.array);
if(new_length > max)
{
delete [] array;
max = new_length;
array = new char[max + 1];
}
for(int i = 0; i < new_length; i++)
{
array[i] = arr.array[i];
}
array[new_length] = '\0';
}
int main()
{
ArrayList x;
std::string y;
char ans;
x.display_size();
std::cout << "Please enter your string: ";
std::cin >> y;
x.store(y);
x.display_size();// << std::endl;
do
{
std::cout << "Please enter your string: ";
std::cin >> y;
x.store(y);
x.display_size();
std::cout << "Do you want to enter another string? (y/n) ";
std::cin >> ans;
}while(ans != 'n');
return 0;
}
My question is regarding C++ dynamic arrays. I've created a class which creates a dynamic array.
I've posted my entire code it should be runnable.
The issue stems from the use of the store and expand functions.
store takes a string and puts each character into the array, if there isn't enough space it calls expand.
expand doubles the size of the array.
Array Content:
Remaining size: 1
Please enter your string: h
Array Content: h
Remaining size: 0
Please enter your string: ello
Array Content:
Remaining size: 2
Do you want to enter another string? (y/n) n
In theory the output above should have returned "hello" however it hasn't returned anything, despite returning the 'h' earlier. I'm completely out of ideas on how to solve this.
EDIT:
I've since changed the functions according to the advice given to me:
void ArrayList::expand()
{
max = size + size;
char *temp = new char[max];
for( int i = 0; i < max; i++ )
{
temp[i] = array[i];
}
free_space = free_space + size;
delete [] array;
array = temp;
size = max;
}
void ArrayList::store(std::string x)
{
int taken = max - free_space;
int y = x.size();
free_space = free_space - y;
for(int i = 0; i < y; i++)
{
if(free_space <= 0)
{
while(free_space <= 0)
{
expand();
}
}
else
{
array[i+taken] = x[i]; //I'm cetain this didn't do anything
}
}
}
I've solved the negative number issue detailed in the comments. The only issue now is printing the number.
This "for" loop in "expand" method:
for( int i = 0; i < max; i++ )
{
array[i] = temp[i];
}
should be replaced by:
for( int i = 0; i < size; i++ )
{
temp[i] = array[i];
}
My question concerns creating an array of moving objects. The goal is to have each created "symbol" starts at x=200 when created and then moves along the x-axis. This is possible by creating individual objects, though when I try to make them appear in an array they just appear on top of each other (made the balls transparent so you can see them stack). Is there a way to create an array of individual array objects so that this doesn't happen.
int numSymbols = 100;
int symbolXPos;
int i;
Symbol[] symbols = new Symbol[numSymbols];
void setup(){
for(int i = 0; i < numSymbols; i++){
symbols[i] = new Symbol();
}
size(748,1024);
}
void draw(){
background(255,255,255);
i++;
if(i > 10){
symbols[1].addSymbol(i);
}
if(i > 100){
symbols[2].addSymbol(i);
}
if(i > 200){
symbols[3].addSymbol(i);
}
}
int symbolY = 748-2*6-89-180;
int symbolX = 200;
int symbolPos;
boolean firstLoop = true;
int xSpeed;
class Symbol{
void addSymbol(int xSpeed){
if(firstLoop == true){
symbolXPos = xSpeed;
firstLoop = false;
}else{
xSpeed = symbolX + (xSpeed - symbolXPos);
fill(0,0,0,20);
noStroke();
rect(xSpeed,symbolY, 36, 36, 18);
}
}
}
The first part that looks strange is that you have these properties that seem related to a Symbol instance, and not to the main scope:
int symbolY = 748-2*6-89-180;
int symbolX = 200;
int symbolPos;
boolean firstLoop = true;
int xSpeed;
perhaps you meant something like this:
int numSymbols = 100;
int symbolXPos;
int i;
Symbol[] symbols = new Symbol[numSymbols];
void setup() {
for (int i = 0; i < numSymbols; i++) {
symbols[i] = new Symbol();
}
size(748, 1024);
}
void draw() {
background(255, 255, 255);
i++;
if (i > 10) {
symbols[1].addSymbol(i);
}
if (i > 100) {
symbols[2].iaddSymbol(i);
}
if (i > 200) {
symbols[3].addSymbol(i);
}
}
class Symbol {
int symbolY = 748-2*6-89-180;
int symbolX = 200;
int symbolPos;
boolean firstLoop = true;
int xSpeed;
void addSymbol(int xSpeed) {
if (firstLoop == true) {
symbolXPos = xSpeed;
firstLoop = false;
} else {
xSpeed = symbolX + (xSpeed - symbolXPos);
fill(0, 0, 0, 20);
noStroke();
rect(xSpeed, symbolY, 36, 36, 18);
}
}
}
Even so, it feels like the code is more complex than it should be and there is some confusion regarding classes and objects/instances.
Let's assume you're simply going to move symbols horizontally. You could use a class as simple as this:
class Symbol{
//position
float x;
float y;
//speed
float speedX;
void updateAndDraw(){
//update position
x += speedX;
//reset to 0 if out of bounds
if(x > width) x = 0;
//draw
rect(x, y, 8, 8, 18);
}
}
It has an x,y and just a speedX for now. The x property is incremented by the speed, and the updated position is used to render the symbol on screen.
Here's a basic sketch using the above class:
int numSymbols = 100;
Symbol[] symbols = new Symbol[numSymbols];
void setup(){
size(748, 1024);
fill(0, 0, 0, 64);
noStroke();
for (int i = 0; i < numSymbols; i++) {
symbols[i] = new Symbol();
//use different speeds to easily tell the symbols appart
symbols[i].speedX = 0.01 * (i+1);
//increment y position so symbols don't overlap
symbols[i].y = 10 * i;
}
}
void draw(){
background(255);
for (int i = 0; i < numSymbols; i++) {
symbols[i].updateAndDraw();
}
}
class Symbol{
//position
float x;
float y;
//speed
float speedX;
void updateAndDraw(){
//update position
x += speedX;
//reset to 0 if out of bounds
if(x > width) x = 0;
//draw
rect(x, y, 8, 8, 18);
}
}
The key points are:
using properties related to a Symbol within the class, not outside (which would mean the global scope)
using different positions so Symbol instances don't overlap
Be sure to check out Daniel Shiffman's Objects tutorial, it provides a nice and easy intro to Object Oriented Programming
I have the following code to copy from a host variable to a __constant__ variable in CUDA
int main(int argc, char **argv){
int exit_code;
if (argc < 4) {
std::cout << "Usage: \n " << argv[0] << " <input> <output> <nColors>" << std::endl;
return 1;
}
Color *h_input;
int h_rows, h_cols;
timer1.Start();
exit_code = readText2RGB(argv[1], &h_input, &h_rows, &h_cols);
timer1.Stop();
std::cout << "Reading: " << timer1.Elapsed() << std::endl;
if (exit_code != SUCCESS){
std::cout << "Error trying to read file." << std::endl;
return FAILURE;
}
CpuTimer timer1;
GpuTimer timer2;
float timeStep2 = 0, timeStep3 = 0;
int h_numColors = atoi(argv[3]);
int h_change = 0;
int *h_pixelGroup = new int[h_rows*h_cols];
Color *h_groupRep = new Color[h_numColors];
Color *h_output = new Color[h_rows*h_cols];
Color *d_input;
int *d_pixelGroup;
Color *d_groupRep;
Color *d_output;
dim3 block(B_WIDTH, B_HEIGHT);
dim3 grid((h_cols+B_WIDTH-1)/B_WIDTH, (h_rows+B_HEIGHT-1)/B_HEIGHT);
checkCudaError(cudaMalloc((void**)&d_input, sizeof(Color)*h_rows*h_cols));
checkCudaError(cudaMalloc((void**)&d_pixelGroup, sizeof(int)*h_rows*h_cols));
checkCudaError(cudaMalloc((void**)&d_groupRep, sizeof(Color)*h_numColors));
checkCudaError(cudaMalloc((void**)&d_output, sizeof(Color)*h_rows*h_cols));
// STEP 1
//Evenly distribute all pixels of the image onto the color set
timer2.Start();
checkCudaError(cudaMemcpyToSymbol(c_rows, &h_rows, sizeof(int)));
checkCudaError(cudaMemcpyToSymbol(c_cols, &h_cols, sizeof(int)));
checkCudaError(cudaMemcpyToSymbol(c_numColors, &h_numColors, sizeof(int)));
checkCudaError(cudaMemcpy(d_input, h_input, sizeof(Color)*h_rows*h_cols, cudaMemcpyHostToDevice));
clut_distributePixels<<<grid, block>>>(d_pixelGroup);
checkCudaError(cudaMemcpy(h_pixelGroup, d_pixelGroup, sizeof(int)*h_rows*h_cols, cudaMemcpyDeviceToHost));
timer2.Stop();
std::cout << "Phase 1: " << timer2.Elapsed() << std::endl;
std::cout << h_pixelGroup[0] << ","
<< h_pixelGroup[3] << ","
<< h_pixelGroup[4] << ","
<< h_pixelGroup[7] << ","
<< h_pixelGroup[8] << std::endl;
//Do the STEP 2 and STEP 3 as long as there is at least one change of representative in a group
do {
// STEP 2
//Set the representative value to the average colour of all pixels in the same set
timer1.Start();
for (int ng = 0; ng < h_numColors; ng++) {
int r = 0, g = 0, b = 0;
int elem = 0;
for (int i = 0; i < h_rows; i++) {
for (int j = 0; j < h_cols; j++) {
if (h_pixelGroup[i*h_cols+j] == ng) {
r += h_input[i*h_cols+j].r;
g += h_input[i*h_cols+j].g;
b += h_input[i*h_cols+j].b;
elem++;
}
}
}
if (elem == 0) {
h_groupRep[ng].r = 255;
h_groupRep[ng].g = 255;
h_groupRep[ng].b = 255;
}else{
h_groupRep[ng].r = r/elem;
h_groupRep[ng].g = g/elem;
h_groupRep[ng].b = b/elem;
}
}
timer1.Stop();
timeStep2 += timer1.Elapsed();
// STEP 3
//For each pixel in the image, compute Euclidean's distance to each representative
//and assign it to the set which is closest
h_change = 0;
timer2.Start();
checkCudaError(cudaMemcpyToSymbol(d_change, &h_change, sizeof(int)));
checkCudaError(cudaMemcpy(d_groupRep, h_groupRep, sizeof(Color)*h_numColors, cudaMemcpyHostToDevice));
clut_checkDistances<<<grid, block>>>(d_input, d_pixelGroup, d_groupRep);
checkCudaError(cudaMemcpy(h_pixelGroup, d_pixelGroup, sizeof(int)*h_rows*h_cols, cudaMemcpyDeviceToHost));
checkCudaError(cudaMemcpyFromSymbol(&h_change, d_change, sizeof(int)));
timer2.Stop();
timeStep3 += timer2.Elapsed();
std::cout << "Chunche" << std::endl;
} while (h_change == 1);
std::cout << "Phase 2: " << timeStep2 << std::endl;
std::cout << "Phase 3: " << timeStep3 << std::endl;
// STEP 4
//Create the new image with the resulting color lookup table
timer2.Start();
clut_createImage<<<grid, block>>>(d_output, d_pixelGroup, d_groupRep);
checkCudaError(cudaMemcpy(h_output, d_output, sizeof(Color)*h_rows*h_cols, cudaMemcpyDeviceToHost));
timer2.Stop();
std::cout << "Phase 4: " << timer2.Elapsed() << std::endl;
checkCudaError(cudaFree(d_input));
checkCudaError(cudaFree(d_pixelGroup));
checkCudaError(cudaFree(d_groupRep));
checkCudaError(cudaFree(d_output));
timer1.Start();
exit_code = writeRGB2Text(argv[2], h_input, h_rows, h_cols);
timer1.Stop();
std::cout << "Writing: " << timer1.Elapsed() << std::endl;
delete[] h_pixelGroup;
delete[] h_groupRep;
delete[] h_output;
return SUCCESS;
}
when I print from within the kernel I get zeros for the three values
__global__
void clut_distributePixels(int *pixelGroup){
int i = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.x * blockIdx.x + threadIdx.x;
if(i == 0 && j == 0){
printf("a: %d\n", c_rows);
printf("b: %d\n", c_cols);
printf("c: %d\n", c_numColors);
}
while (i < c_rows) {
while (j < c_cols) {
pixelGroup[i*c_cols+j] = (i*c_cols+j)/c_numColors;
j += gridDim.x * blockDim.x;
}
j = blockDim.x * blockIdx.x + threadIdx.x;
i += gridDim.y * blockDim.y;
}
}
Either I am not copying correctly to constant memory or ... I don't know what could be wrong. Any advise !?
I posted the entire host code probably something else is messing with the constant copies.
UPDATE
Main.cu
#include "Imageproc.cuh"
int main(){
int h_change = 0;
int h_rows = 512;
cudaMemcpyToSymbol(c_rows, &h_rows, sizeof(int));
chunche<<<1,1>>>();
cudaMemcpyFromSymbol(&h_change, d_change, sizeof(int));
std::cout << "H = " << h_change << std::endl;
return 0
}
Imageproc.cuh
#ifndef _IMAGEPROC_CUH_
#define _IMAGEPROC_CUH_
#include "Utilities.cuh"
#define B_WIDTH 16
#define B_HEIGHT 16
__constant__ int c_rows;
__constant__ int c_cols;
__constant__ int c_numColors;
__device__ int d_change;
#ifdef __cplusplus
extern "C"
{
#endif
__global__
void chunche();
__global__
void clut_distributePixels(int *pixelGroup);
__global__
void clut_checkDistances(Color *input, int *pixelGroup, Color *groupRep);
__global__
void clut_createImage(Color *clutImage, int *pixelGroup, Color *groupRep);
#ifdef __cplusplus
}
#endif
#endif
Imageproc.cu
#include "Imageproc.cuh"
__global__
void chunche(){
d_change = c_rows + 1;
}
__global__
void clut_distributePixels(int *pixelGroup){
int i = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.x * blockIdx.x + threadIdx.x;
while (i < c_rows) {
while (j < c_cols) {
pixelGroup[i*c_cols+j] = (i*c_cols+j)/c_numColors;
j += gridDim.x * blockDim.x;
}
j = blockDim.x * blockIdx.x + threadIdx.x;
i += gridDim.y * blockDim.y;
}
}
__global__
void clut_checkDistances(Color *input, int *pixelGroup, Color *groupRep){
int i = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.x * blockIdx.x + threadIdx.x;
int newGroup;
while (i < c_rows) {
while (j < c_cols) {
newGroup = 0;
for (int ng = 1; ng < c_numColors; ng++) {
if (
/*If distance from color to group ng is less than distance from color to group idx
then color should belong to ng*/
(groupRep[ng].r-input[i*c_cols+j].r)*(groupRep[ng].r-input[i*c_cols+j].r) +
(groupRep[ng].g-input[i*c_cols+j].g)*(groupRep[ng].g-input[i*c_cols+j].g) +
(groupRep[ng].b-input[i*c_cols+j].b)*(groupRep[ng].b-input[i*c_cols+j].b)
<
(groupRep[newGroup].r-input[i*c_cols+j].r)*(groupRep[newGroup].r-input[i*c_cols+j].r)+
(groupRep[newGroup].g-input[i*c_cols+j].g)*(groupRep[newGroup].g-input[i*c_cols+j].g)+
(groupRep[newGroup].b-input[i*c_cols+j].b)*(groupRep[newGroup].b-input[i*c_cols+j].b)
)
{
newGroup = ng;
}
}
if (pixelGroup[i*c_cols+j] != newGroup) {
pixelGroup[i*c_cols+j] = newGroup;
d_change = 1;
}
j += gridDim.x * blockDim.x;
}
j = blockDim.x * blockIdx.x + threadIdx.x;
i += gridDim.y * blockDim.y;
}
}
__global__
void clut_createImage(Color *clutImage, int *pixelGroup, Color *groupRep){
int i = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.x * blockIdx.x + threadIdx.x;
while (i < c_rows) {
while (j < c_cols) {
clutImage[i*c_cols+j].r = groupRep[pixelGroup[i*c_cols+j]].r;
clutImage[i*c_cols+j].g = groupRep[pixelGroup[i*c_cols+j]].g;
clutImage[i*c_cols+j].b = groupRep[pixelGroup[i*c_cols+j]].b;
j += gridDim.x * blockDim.x;
}
j = blockDim.x * blockIdx.x + threadIdx.x;
i += gridDim.y * blockDim.y;
}
}
Utilities.cuh
#ifndef _UTILITIES_CUH_
#define _UTILITIES_CUH_
#include <iostream>
#include <fstream>
#include <string>
#define SUCCESS 1
#define FAILURE 0
#define checkCudaError(val) check( (val), #val, __FILE__, __LINE__)
typedef struct {
int r;
int g;
int b;
} vec3u;
typedef vec3u Color;
typedef unsigned char uchar;
typedef uchar Grayscale;
struct GpuTimer{
cudaEvent_t start;
cudaEvent_t stop;
GpuTimer(){
cudaEventCreate(&start);
cudaEventCreate(&stop);
}
~GpuTimer(){
cudaEventDestroy(start);
cudaEventDestroy(stop);
}
void Start(){
cudaEventRecord(start, 0);
}
void Stop(){
cudaEventRecord(stop, 0);
}
float Elapsed(){
float elapsed;
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed, start, stop);
return elapsed;
}
};
template<typename T>
void check(T err, const char* const func, const char* const file, const int line) {
if (err != cudaSuccess) {
std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
exit(1);
}
}
int writeGrayscale2Text(const std::string filename, const Grayscale *image, const int rows, const int cols);
int readText2Grayscale(const std::string filename, Grayscale **image, int *rows, int *cols);
int writeRGB2Text(const std::string filename, const Color *image, const int rows, const int cols);
int readText2RGB(const std::string filename, Color **image, int *rows, int *cols);
struct CpuTimer{
clock_t start;
clock_t stop;
void Start(){
start = clock();
}
void Stop(){
stop = clock();
}
float Elapsed(){
return ((float)stop-start)/CLOCKS_PER_SEC * 1000.0f;
}
};
#endif
Utilities.cu
#include "Utilities.cuh"
int writeGrayscale2Text(const std::string filename, const Grayscale *image, const int rows, const int cols){
std::ofstream fileWriter(filename.c_str());
if (!fileWriter.is_open()) {
std::cerr << "** writeGrayscale2Text() ** : Unable to open file." << std::endl;
return FAILURE;
}
fileWriter << rows << "\n";
fileWriter << cols << "\n";
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
fileWriter << (int)image[i*cols+j] << "\n";
}
}
fileWriter.close();
return SUCCESS;
}
int readText2Grayscale(const std::string filename, Grayscale **image, int *rows, int *cols){
std::ifstream fileReader(filename.c_str());
if (!fileReader.is_open()) {
std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
return FAILURE;
}
fileReader >> *rows;
fileReader >> *cols;
*image = new Grayscale[(*rows)*(*cols)];
int value;
for (int i = 0; i < *rows; i++) {
for (int j = 0; j < *cols; j++) {
fileReader >> value;
(*image)[i*(*cols)+j] = (Grayscale)value;
}
}
fileReader.close();
return SUCCESS;
}
int writeRGB2Text(const std::string filename, const Color *image, const int rows, const int cols){
std::ofstream fileWriter(filename.c_str());
if (!fileWriter.is_open()) {
std::cerr << "** writeRGB2Text() ** : Unable to open file." << std::endl;
return FAILURE;
}
fileWriter << rows << "\n";
fileWriter << cols << "\n";
for (int k = 0; k < 3; k++) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
switch (k) {
case 0:
fileWriter << image[i*cols+j].r << "\n";
break;
case 1:
fileWriter << image[i*cols+j].g << "\n";
break;
case 2:
fileWriter << image[i*cols+j].b << "\n";
break;
}
}
}
}
fileWriter.close();
return SUCCESS;
}
int readText2RGB(const std::string filename, Color **image, int *rows, int *cols){
std::ifstream fileReader(filename.c_str());
if (!fileReader.is_open()) {
std::cerr << "** readText2Grayscale() ** : Unable to open file." << std::endl;
return FAILURE;
}
fileReader >> *rows;
fileReader >> *cols;
*image = new Color[(*rows)*(*cols)];
for (int k = 0; k < 3; k++) {
for (int i = 0; i < *rows; i++) {
for (int j = 0; j < *cols; j++) {
switch (k) {
case 0:
fileReader >> (*image)[i*(*cols)+j].r;
break;
case 1:
fileReader >> (*image)[i*(*cols)+j].g;
break;
case 2:
fileReader >> (*image)[i*(*cols)+j].b;
break;
}
}
}
}
fileReader.close();
return SUCCESS;
}
Constant memory has implicit local scope linkage - answer to this on stack overflow.
This means that the cudaMemcpyToSymbol have to be in the same generated .obj file of the kernel where you want to use it.
You do your memcopy in Main.cu, but the kernel where you use your canstant memory is in Imageproc.cu. So for the constant values are unknown for the kernel chunche.
A option to solve you're problem can be, to implement a wrapper. Just add a function in Imagepro.cu where you do the cudaMemcpyToSymbol and call the wrapper in Main.cu and pass your desired values for the constant memory in there.
I have followed median compare algorithm to find median of two sorted arrays and implemented in java. As per algorithm time complexity is O(lgn) but since it involves creating subarrays(method createSubArr) then I think as per my code it's O(n). Below is code implemented by me.
class Median
{
public static void main (String[] args)
{
int[] a = {1,12,15,26,38,40};
int[] b = {2,13,17,30,45,50};
System.out.println(getMedian(a,b,6));
}
private static int median(int[] a, int n){
if(n % 2 == 0) return (a[n/2] + a[(n/2)-1])/2;
else return a[n/2];
}
private static void show(int[] a) {
for(int i=0;i<a.length;i++) System.out.print(a[i] + " ");
System.out.println();
}
private static int[] createSubArr(int[] a, int start){
int[] sub = new int[a.length-start];
for(int i=0;i<a.length-start;i++) sub[i] = a[start+i];
return sub;
}
private static int getMedian(int[] a, int[] b,int n){
int m1,m2;
int start=-1;
int ans = -1;
if(n<=0) return -1;
if(n==1) return (a[0] + b[0])/2;
if(n==2) return (Math.max(a[0],b[0]) + Math.min(a[1],b[1]) )/2;
m1=median(a,n);
m2=median(b,n);
if(m1 < m2) {
if(n%2==0){
start = (n/2)-1;
a = createSubArr(a,start);
}
else {
start = (n/2);
a = createSubArr(a,(n/2));
}
}else{
if(n%2==0){
start = (n/2)-1;
b = createSubArr(b,start);
}
else {
start = (n/2);
b = createSubArr(b,start);
}
}
return getMedian(a,b,n-start);
}
}
Thanks.
You can add two extra parameters to getMedian: the start indices of a and b, then there is no need to copy data.
Besides, instead of the snippet:
if (n%2 == 0) {
x = n/2 - 1;
} else {
x = n/2;
}
you can simply use (n-1)/2;
You would need to adapt the following two methods to get a O(lg n) algorithm (not tested):
private static int median(int[] a, int a0, int n){
if(n % 2 == 0) return (a[a0 + n/2] + a[a0 + (n/2)-1])/2;
else return a[a0 + n/2];
}
private static int getMedian(int[] a, int a0, int[] b, int b0,int n){
int m1,m2;
int start=-1;
int ans = -1;
if(n<=0) return -1;
if(n==1) return (a[a0] + b[b0])/2;
if(n==2) return (Math.max(a[a0],b[b0]) + Math.min(a[a0+1],b[b0+1]) )/2;
m1=median(a, a0,n);
m2=median(b, b0,n);
if(m1 < m2) {
return getMedian(a, (n-1)/2, b, b0, n - (n-1)/2);
} else {
return getMedian(a, a0, b, (n-1)/2, n - (n-1)/2);
}
}
following some former advice
Multithreaded application, am I doing it right?
I think I have a threadsafe number generator using boost, but my program crashes when I input 1000 iterations. The output .csv file when graphed looks right, but I'm not sure why it's crashing.
It's using _beginthread, and everyone is telling me I should use the more (convoluted) _beingthreadex, which I'm not familiar with. If someone could recommend an example, I would greatly appreciate it.
Also... someone pointed out I should be applying a second parameter to my _beginthread for the array counting start positions, but I have no idea how to pass more than one parameter, other than attempting to use a structure, and I've read structure's and _beginthread don't get along (although, I could just use the boost threads...)
#include <process.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#include <time.h>
#include <random>
#include <boost/random.hpp>
//for srand48_r(time(NULL), &randBuffer); which doesn't work
#include <stdio.h>
#include <stdlib.h>
//#include <thread>
using namespace std;
using namespace boost;
using namespace boost::random;
void myThread0 (void *dummy );
void myThread1 (void *dummy );
void myThread2 (void *dummy );
void myThread3 (void *dummy );
//for random seeds
void initialize();
//from https://stackoverflow.com/questions/7114043/random-number-generation-in-c11-how-to-generate-how-do-they-work
uniform_int_distribution<> two(1,2);
typedef std::mt19937 MyRNG; // the Mersenne Twister with a popular choice of parameters
uint32_t seed_val; // populate somehow
MyRNG rng1; // e.g. keep one global instance (per thread)
MyRNG rng2; // e.g. keep one global instance (per thread)
MyRNG rng3; // e.g. keep one global instance (per thread)
MyRNG rng4; // e.g. keep one global instance (per thread)
//only needed for shared variables
//CRITICAL_SECTION cs1,cs2,cs3,cs4; // global
int main()
{
ofstream myfile;
myfile.open ("coinToss.csv");
int rNum;
long numRuns;
long count = 0;
int divisor = 1;
float fHolder = 0;
long counter = 0;
float percent = 0.0;
//?
//unsigned threadID;
//HANDLE hThread;
initialize();
HANDLE hThread[4];
const int size = 100000;
int array[size];
printf ("Runs (uses multiple of 100,000) ");
cin >> numRuns;
for (int a = 0; a < numRuns; a++)
{
hThread[0] = (HANDLE)_beginthread( myThread0, 0, (void*)(array) );
hThread[1] = (HANDLE)_beginthread( myThread1, 0, (void*)(array) );
hThread[2] = (HANDLE)_beginthread( myThread2, 0, (void*)(array) );
hThread[3] = (HANDLE)_beginthread( myThread3, 0, (void*)(array) );
//waits for threads to finish before continuing
WaitForMultipleObjects(4, hThread, TRUE, INFINITE);
//closes handles I guess?
CloseHandle( hThread[0] );
CloseHandle( hThread[1] );
CloseHandle( hThread[2] );
CloseHandle( hThread[3] );
//dump array into calculations
//average array into fHolder
//this could be split into threads as well
for (int p = 0; p < size; p++)
{
counter += array[p] == 2 ? 1 : -1;
//cout << array[p] << endl;
//cout << counter << endl;
}
//this fHolder calculation didn't work
//fHolder = counter / size;
//so I had to use this
cout << counter << endl;
fHolder = counter;
fHolder = fHolder / size;
myfile << fHolder << endl;
}
}
void initialize()
{
//seed value needs to be supplied
//rng1.seed(seed_val*1);
rng1.seed((unsigned int)time(NULL));
rng2.seed(((unsigned int)time(NULL))*2);
rng3.seed(((unsigned int)time(NULL))*3);
rng4.seed(((unsigned int)time(NULL))*4);
};
void myThread0 (void *param)
{
//EnterCriticalSection(&cs1); //aquire the critical section object
int *i = (int *)param;
for (int x = 0; x < 25000; x++)
{
//doesn't work, part of merssene twister
//i[x] = next();
i[x] = two(rng1);
//original srand
//i[x] = rand() % 2 + 1;
//doesn't work for some reason.
//uint_dist2(rng);
//i[x] = qrand() % 2 + 1;
//cout << i[x] << endl;
}
//LeaveCriticalSection(&cs1); // release the critical section object
}
void myThread1 (void *param)
{
//EnterCriticalSection(&cs2); //aquire the critical section object
int *i = (int *)param;
for (int x = 25000; x < 50000; x++)
{
//param[x] = rand() % 2 + 1;
i[x] = two(rng2);
//i[x] = rand() % 2 + 1;
//cout << i[x] << endl;
}
//LeaveCriticalSection(&cs2); // release the critical section object
}
void myThread2 (void *param)
{
//EnterCriticalSection(&cs3); //aquire the critical section object
int *i = (int *)param;
for (int x = 50000; x < 75000; x++)
{
i[x] = two(rng3);
//i[x] = rand() % 2 + 1;
//cout << i[x] << endl;
}
//LeaveCriticalSection(&cs3); // release the critical section object
}
void myThread3 (void *param)
{
//EnterCriticalSection(&cs4); //aquire the critical section object
int *i = (int *)param;
for (int x = 75000; x < 100000; x++)
{
i[x] = two(rng4);
//i[x] = rand() % 2 + 1;
//cout << i[x] << endl;
}
//LeaveCriticalSection(&cs4); // release the critical section object
}
This isn't really an answer to the issue exactly (I did read that _beginThread is prone to memory leaks), but I did create a v5 that used only boost threads (commented lines removed). Btw, I have no idea where it was crashing
#include <process.h>
#include <windows.h>
#include <iostream>
#include <fstream>
#include <time.h>
#include <random>
#include <boost/thread.hpp>
#include <boost/random.hpp>
//for srand48_r(time(NULL), &randBuffer); which doesn't work
#include <stdio.h>
#include <stdlib.h>
//#include <thread>
using namespace std;
using namespace boost;
using namespace boost::random;
//for random seeds
void initialize();
void workerFunc1(void *dummy );
void workerFunc2(void *dummy );
void workerFunc3(void *dummy );
void workerFunc4(void *dummy );
//from http://stackoverflow.com/questions/7114043/random-number-generation-in-c11-how-to-generate-how-do-they-work
uniform_int_distribution<> two(1,2);
typedef std::mt19937 MyRNG; // the Mersenne Twister with a popular choice of parameters
uint32_t seed_val; // populate somehow
MyRNG rng1; // e.g. keep one global instance (per thread)
MyRNG rng2; // e.g. keep one global instance (per thread)
MyRNG rng3; // e.g. keep one global instance (per thread)
MyRNG rng4; // e.g. keep one global instance (per thread)
//only needed for shared variables
//CRITICAL_SECTION cs1,cs2,cs3,cs4; // global
int main()
{
ofstream myfile;
myfile.open ("coinToss.csv");
int rNum;
long numRuns;
long count = 0;
int divisor = 1;
float fHolder = 0;
long counter = 0;
float percent = 0.0;
//?
//unsigned threadID;
//HANDLE hThread;
initialize();
HANDLE hThread[4];
const int size = 100000;
int array[size];
printf ("Runs (uses multiple of 100,000) ");
cin >> numRuns;
for (int a = 0; a < numRuns; a++)
{
thread workerThread1(workerFunc1, (void*)(array) );
thread workerThread2(workerFunc2, (void*)(array) );
thread workerThread3(workerFunc3, (void*)(array) );
thread workerThread4(workerFunc4, (void*)(array) );
//waits for threads to finish before continuing
workerThread1.join();
workerThread2.join();
workerThread3.join();
workerThread4.join();
//dump array into calculations
//average array into fHolder
//this could be split into threads as well
for (int p = 0; p < size; p++)
{
counter += array[p] == 2 ? 1 : -1;
//cout << array[p] << endl;
//cout << counter << endl;
}
//this fHolder calculation didn't work
//fHolder = counter / size;
//so I had to use this
//cout << counter << endl;
fHolder = counter;
fHolder = fHolder / size;
myfile << fHolder << endl;
}
}
void initialize()
{
//seed value needs to be supplied
//rng1.seed(seed_val*1);
rng1.seed((unsigned int)time(NULL));
rng2.seed(((unsigned int)time(NULL))*2);
rng3.seed(((unsigned int)time(NULL))*3);
rng4.seed(((unsigned int)time(NULL))*4);
};
//recieves array, and spot of array to work on,
void workerFunc1(void *param)
{
int *i = (int *)param;
for (int x = 0; x < 25000; x++)
{
//gets a random 1 or 2 value
i[x] = two(rng1);
//converts to -1 or +1, then adds it to sum (I would love to combine these two)
}
//return sum;
}
void workerFunc2(void *param)
{
int *i = (int *)param;
for (int x = 25000; x < 50000; x++)
{
//gets a random 1 or 2 value
i[x] = two(rng2);
//converts to -1 or +1, then adds it to sum (I would love to combine these two)
}
//return sum;
}
void workerFunc3(void *param)
{
int *i = (int *)param;
for (int x = 50000; x < 75000; x++)
{
//gets a random 1 or 2 value
i[x] = two(rng3);
//converts to -1 or +1, then adds it to sum (I would love to combine these two)
}
//return sum;
}
void workerFunc4(void *param)
{
int *i = (int *)param;
for (int x = 75000; x < 100000; x++)
{
//gets a random 1 or 2 value
i[x] = two(rng4);
//converts to -1 or +1, then adds it to sum (I would love to combine these two)
}
//return sum;
}