To speed up C-code random selection with further modification array - arrays

I'm trying to write some code in C-language. The main idea is that I have an input linear array that consists the readius for each pixel (`````` - something like that, moreover, the length of pix_r, for instance, for picture with size (128,512) will be 128 * 512). And I need for each radius random selected fixed numbers of pixels and other set to -1. What I mean:
r = 2 in pix_r = [1, 8, 2, 2, 4, 6, 7, 7, 8, 2, 8] is in the following positions currentR = [2, 3, 9], and let's NumberOfRandomS = 2, so one of the possible result can be pix_r = [*, *, 2, -1, *, *, *, *, *, 2, *]. and the same should be doe for each r. If number of items == r is less than NumberOfRandomS, we should pick up all elements without any modification.
I try to write this in C-code. But I am a newbie and don't know all features and tips for optimization. My first aprroach of writing this function is
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdint.h>
#include <stddef.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>
const int NumberOfRandomS = 5;
void RandomSelected(size_t numEl, int maxRad, int *pix_r){
srand(time(NULL));
int lenRandomIndex = NumberOfRandomS*sizeof(int);
int* RandomIndex = (int*) malloc(lenRandomIndex);
memset(RandomIndex, 0, lenRandomIndex);
int lenNumPerShell1 = (maxRad) * sizeof(int);
int* numPerShell1 = (int*) malloc(lenNumPerShell1);
memset(numPerShell1, 0, lenNumPerShell1);
//Calculate the number of each pix_r per shell
for (int i=0; i<numEl; ++i){
numPerShell1[pix_r[i]]++;
}
//Main part for random selection of NumberOfRandomS items
//for each pix_r
for(int r=0; r<maxRad; ++r){
int lenShellR = numPerShell1[r];
//if number of items for this r is less than should be
//selected, skip it. It means that we selected all items
//for this r
if(lenShellR <= NumberOfRandomS){
continue;
}
int lenCurrentR = lenShellR*sizeof(int);
int* currentR = (int *) malloc(lenCurrentR); // array of indexes for this r
memset(currentR, 0, lenCurrentR);
//filling currentR array with all indexes for this r
int cInd = 0;
for(register int j=0; j<numEl; ++j){
if(pix_r[j] == r){
currentR[cInd] = j;
cInd++;
}
}
//generate random indexes without repetiotion that should be selected from currentR
//this indexes help us to save r value in these positions and others indexes for this r
//set to -1
int value[NumberOfRandomS];
for (int i=0;i<NumberOfRandomS;++i)
{
int check; //variable to check or index is already used for this r
size_t pick_index; //variable to store the random index in
do
{
pick_index = rand() % lenShellR;
//check or index is already used for this r:
check=1;
for (int j=0;j<i;++j)
if (pick_index == value[j]) //if index is already used
{
check=0; //set check to false
break; //no need to check the other elements of value[]
}
} while (check == 0); //loop until new, unique index is found
value[i]=pick_index; //store the generated index in the array
RandomIndex[i] = currentR[pick_index];
}
//set all positions for each r that are not on random selected to -1
for(register int k=0; k < lenShellR; ++k)
{
int flag = 0; // flag will be 1 if this index for this r in RandomIndex
for (register int q = 0; q < NumberOfRandomS; ++q)
{
if(RandomIndex[q] == currentR[k])
{
flag = 1; //this index is found
}
}
if(flag != 1)
{
//index for this r not in RandomIndex, so set this index for this r to -1
pix_r[currentR[k]] = -1;
}
}
}
return;
}
I tried to optimize a little bit, but different resources contradict each other and after testing it didn't show any speeding up:
void ModRandomSelected(size_t numEl, int maxRad, int *pix_r){
srand(time(NULL));
int lenRandomIndex = NumberOfRandomS*sizeof(int);
int* RandomIndex = (int*) malloc(lenRandomIndex);
memset(RandomIndex, 0, lenRandomIndex);
int lenNumPerShell1 = (maxRad) * sizeof(int);
int* numPerShell1 = (int*) malloc(lenNumPerShell1);
memset(numPerShell1, 0, lenNumPerShell1);
//Calculate the number of each pix_r per shell
for (int i=numEl-1; i>=0; --i){
numPerShell1[pix_r[i]]++;
}
//Main part for random selection of NumberOfRandomS items
//for each pix_r
for(int r=maxRad-1; r>=0; --r)
{
int lenShellR = numPerShell1[r];
//if number of items for this r is less than should be
//selected, skip it. It means that we selected all items
//for this r
if(lenShellR <= NumberOfRandomS){
continue;
}
int lenCurrentR = lenShellR*sizeof(int);
int* currentR = (int *) malloc(lenCurrentR); // array of indexes for this r
memset(currentR, 0, lenCurrentR);
//filling currentR array with all indexes for this r
int cInd = 0;
for(register int i = numEl-1; i>=0; --i)
{
if(pix_r[i] == r){
currentR[cInd] = i;
cInd++;
}
}
//generate random indexes without repetiotion that should be selected from currentR
//this indexes help us to save r value in these positions and others indexes for this r
//set to -1
int value[NumberOfRandomS];
for (int i=NumberOfRandomS-1; i>=0; --i)
{
int check; //variable to check or index is already used for this r
size_t pick_index; //variable to store the random index in
do
{
pick_index = rand() % lenShellR;
//check or index is already used for this r:
check=1;
for (int j=0;j<i;++j)
if (pick_index == value[j]) //if index is already used
{
check=0; //set check to false
break; //no need to check the other elements of value[]
}
} while (check == 0); //loop until new, unique index is found
value[i]=pick_index; //store the generated index in the array
RandomIndex[i] = currentR[pick_index];
}
//set all positions for each r that are not on random selected to -1
for(register int k=lenShellR-1; k >= 0; --k)
{
int flag = 0; // flag will be 1 if this index for this r in RandomIndex
for (register int q = NumberOfRandomS-1; q >= 0; --q)
{
if(RandomIndex[q]== currentR[k]){
flag = 1; //this index is found
}
}
if(flag != 1)
{
//index for this r not in RandomIndex, so set this index for this r to -1
pix_r[currentR[k]] = -1;
}
}
}
return;
}
I will be very thankful if you help and explain what and how I can improve this function.

The code is rather messy and hard to follow, so I can't be bothered to figure out what it actually does. The algorithm overall might be the true bottleneck. Anyway, here's some misc comments & advise of potential problems that I spotted:
Ensure to only call srand once in the whole program.
The register keyword is obsolete, from a time when compilers were bad at determining when to place variables in registers. Nowadays, compilers are more competent at this than programmers, don't use register, it is bloat.
Similarly, replacing up-counting loops with down-counting ones for the sake of performance is an obsolete technique nowadays sorting under "pre-mature optimization". The compiler can do that optimization for you - so write the code as readable as possible instead.
Avoid iterating over the same range/array multiple times.
Keep loop conditions as trivial as possible. This helps readability and data cache optimization both. The ideal for loop should look like for(int i=0; i<n; i++).
malloc is much slower than static or local storage. In this case you have a few items and only need to access them locally, so all malloc calls should be swapped with local arrays. You may use VLA here, to get stack allocation instead. That is, drop this code:
int lenRandomIndex = NumberOfRandomS;
int* RandomIndex = (int*) malloc(lenRandomIndex);
memset(RandomIndex, 0, lenRandomIndex);
and replace with this code:
int RandomIndex [NumberOfRandomS];
You have similar situations all over the code. And you probably don't need to set it to zero, because:
Don't zero-initialize or memset arrays that you indeed to fill with data the first thing you do anyway. This is a rather big performance problem in the posted code.
Empty return ; at the end of a function returning void is just clutter.
Investigate if some of these searches could be replaced with binary search. It means sorting the data in advance but might lead to much faster code overall.
Minimize the amount of checks, particularly inside loops.
Split up your big monster functions into several. Local static functions are very certain to get inlined and they improve readability a lot. Splitting functions into several smaller also allows much easier benchmarking.
Please benchmark your code when optimizations are enabled.

Related

Find the most frequent elements in an array of Integers

I have to find all of the elements which have the maximum frequency. For example, if array a={1,2,3,1,2,4}, I have to print as 1, also 2. My code prints only 2. How to print the second one?
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define n 6
int main(){
int a[n]={1,2,3,1,2,4};
int counter=0,mostFreq=-1,maxcnt=0;
for(int i=0;i<n;i++){
for(int j=i+1;j<n;j++){
if(a[i]==a[j]){
counter++;
}
}
if(counter>maxcnt){
maxcnt=counter;
mostFreq=a[i];
}
}
printf("The most frequent element is: %d",mostFreq);
}
How to print the second one?
The goal it not only to print a potential 2nd one, but all the all of the elements which have the maximum frequency.
OP already has code that determines the maximum frequency. Let us build on that. Save it as int target = mostFreq;.
Instead of printing mostFreq, a simple (still O(n*n)) approach would perform the same 2-nested for() loops again. Replace this 2nd:
if(counter>maxcnt){
maxcnt=counter;
mostFreq=a[i];
}
With:
if(counter == target){
; // TBD code: print the a[i] and counter.
}
For large n, a more efficient approach would sort a[] (research qsort()). Then walk the sorted a[] twice, first time finding the maximum frequency and the 2nd time printing values that match this frequency.
This is O(n* log n) in time and O(n) in memory (if a copy of the original array needed to preserve the original). If also works well with negative values or if we change the type of a[] from int to long long, double, etc.
The standard student solution to such problems would be this:
Make a second array called frequency, of the same size as the maximum value occurring in your data.
Init this array to zero.
Each time you encounter a value in the data, use that value as an index to access the frequency array, then increment the corresponding frequency by 1. For example freq[value]++;.
When done, search through the frequency array for the largest number(s). Optionally, you could sort it.
We can (potentially) save some effort in an approach with unsorted data by creating an array of boolean flags to determine whether we need to count an element at all.
For the array {1, 2, 3, 1, 2, 4} we do have nested for loops, so O(n) complexity, but we can avoid the inner loop entirely for repeated numbers.
#include <stdio.h>
#include <stdbool.h>
int main(void) {
int arr[] = {1, 2, 3, 1, 2, 4};
size_t arr_size = sizeof(arr) / sizeof(*arr);
bool checked[arr_size];
for (size_t i = 0; i < arr_size; i++) checked[i] = false;
unsigned int counts[arr_size];
for (size_t i = 0; i < arr_size; i++) counts[i] = 0;
for (size_t i = 0; i < arr_size; i++) {
if (!checked[i]) {
checked[i] = true;
counts[i]++;
for (size_t j = i+1; j < arr_size; j++) {
if (arr[i] == arr[j]) {
checked[j] = true;
counts[i]++;
}
}
}
}
unsigned int max = 0;
for (size_t i = 0; i < arr_size; i++) {
if (counts[i] > max) max = counts[i];
}
for (size_t i = 0; i < arr_size; i++) {
if (counts[i] == max)
printf("%d\n", arr[i]);
}
return 0;
}

How could I avoid the pc to pick the same number?

If I have an array like this
int numbers[10] = {1,1,3,3,5,5,7,7,8,8};
And I want to pick one randomly
i = numbers[rand()% 10];
How could I avoid the pc to pick the same number more than twice for this example. Because like you can see in the array the same number is repeated 2 times. So I would like, for example, the number 8 to be choose just twice and same for the other numbers.
I know that I could do something like mark an element as "deleted", e.g., by setting it to 0, then if the number chosen has been deleted, you choose again. But I don't know how to do it properly, so If anyone here can help me I would be very grateful.
You can generate the random number so it's equal to the yet unused number of elements and move unused elements to the front of the array.
#define SZ 10
int numbers[SZ] = {1,1,3,3,5,5,7,7,8,8};
for (int i = 0; i < SZ; ++i)
{
// Generate the random number in the range [0 .. UNUSED-ELEMENTS]
// First loop in range [0..9]
// Second loop in range [0..8]
// and so on
int r = rand() % (SZ-i);
int d = numbers[r];
printf("%d ", d);
// Overwrite the used element with an unused element, i.e. last unused,
// so that unused elements are always at the lowest array index
numbers[r] = numbers[SZ-i-1];
}
printf("\n");
Implemented with an array + "swap and pop":
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
static int pick_number(int *numbers, int max)
{
int index = rand() % max;
int result = numbers[index];
// Swap
numbers[index] = numbers[max-1];
return result;
}
int main(int argc, char *argv[])
{
srand(time(NULL));
int numbers[10] = {1,1,3,3,5,5,7,7,8,8};
for (int i = 10; i > 0; i--) {
printf("got: %d\n", pick_number(numbers, i));
}
return 0;
}

C How to Keep a Random Variable From Repeating the Same Number

So I'm just learning C and I would like to know how you could prevent a variable randomized with the rand() function from repeating the same number. I have a script which simply randomizes and prints a variable in a for loop 4 times. How could I make it so the variable never gets the same number after each time it uses the rand() function?
#include <stdio.h>
#include <stdlib.h>
int randomInt;
int main()
{
srand(time(0));
for (int i = 0; i < 4; ++i) {
randomInt = rand() % 4;
printf("%d\n", randomInt);
}
return 0;
}
On most machines, int is 32 bits. So after 232 iterations, you are sure that you'll get some repetition (and probably much before).
If you restrict yourself to much less loops, consider e.g. keeping an array of previously met random numbers (or some hash table, or some binary tree, or some other container).
For a loop repeated only 4 times, keeping an array of (at most 4-1) previously emitted numbers is quite simple, and efficient enough.
Read also about the pigeonhole principle.
A slightly different approach.
int set[] = {0, 1, 2, 3 } ;
srand(time(0));
shuffle(set,4);
using the shuffle algorithm given in this question
https://stackoverflow.com/a/6127606/9288531
I'm guessing that you are getting the same numbers because your are running your program multiple times within the same second. If time(0) hasn't changed, you will have the same seed and the same random numbers generated. Unless your program runs extremely quickly, I imagine using a seed based on microseconds instead of seconds would work:
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
int randomInt;
int main()
{
struct timeval my_microtimer;
gettimeofday(&t1, NULL);
srand(t1.tv_sec * my_microtimer.tv_usec);
for (int i = 0; i < 4; ++i) {
randomInt = rand() % 4;
printf("%d\n", randomInt);
}
return 0;
}
What you could do is keeping track of each number you already generated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int hasMyNumberAlreadyBeenGenerated(int number, int generatedNumbers[], int size){
for(int i = 0; i < size + 1; i++){
//If you already generated the number, it should be present somewhere in your array
if(generatedNumbers[i] == number) return 1;
//If you did not, find the first available space in your array, and put the number you generated into that space
if(generatedNumbers[i] == 0){
generatedNumbers[i] = number;
break; //No need to continue to check the array
}
}
return 0;
}
int main()
{
int randomInt;
int generatedNumbers[4];
//We set "0" in all the array, to be sure that the array doesn't contain unknown datas when we create it
memset(generatedNumbers, 0x0, sizeof(generatedNumbers));
srand(time(0));
for (int i = 0; i < 4; ++i) {
randomInt = rand() % 4 + 1;
//As long as the number you generate has already been generated, generate a new one
while(hasMyNumberAlreadyBeenGenerated(randomInt, generatedNumbers, i) == 1){
randomInt = rand() % 4 + 1;
}
printf("generated : %d\n", randomInt);
}
return 0;
}
The problem with this method is that you can't generate a 0, because if you do you'll endlessly loop.
You can bypass this problem using a dynamic array using malloc() function.
If you want to write clean code you should define how many numbers you want to generate with a #define.
What you seem to be asking is a non-random set of numbers 0 to 3 in a random order. Given that;
int set[] = {0, 1, 2, 3 } ;
int remaining = sizeof(set) / sizeof(*set) ;
while( remaining != 0 )
{
int index = rand() % sizeof(set) / sizeof(*set) ;
if( set[index] > 0 )
{
printf( "%d\n", set[index] ) ;
set[index] = -1 ;
remaining-- ;
}
}
For very large sets, this approach may not be practical - the number of iterations necessary to exhaust the set is non-deterministic.

Shuffle an array of strings randomly

I am writing a program that reads a text file as an input and randomly shuffles the array of strings for the user.
I have written a program that shuffles the string array randomly but I want to do it in a way that no two elements that are the same are beside each other.
Here's an example:
The original array would look like this
{1,2,3,4,5,1,2}
The shuffled array would look like this
{5,3,1,2,4,2,1}
But currently my program creates an output array of this
{5,1,1,3,2,4,2}
Here is my code that shuffles the elements randomly:
int i;
char s[11][100];
char line[100], t[100];
/*Open the text file*/
FILE *fp;
fp = fopen("players.txt", "r");
/*Read each line and put it into an element in an array.
Each line will be in a seperate element in the array.*/
i=0;
while(fgets(line, 100, fp)!= NULL){
strcpy(s[i], line);
i++;
}
/*Generates a random number stored in j and shuffles the order of the array randomly*/
for(i=1; i<10; i++){
j = rand()%(i+1);
strcpy(t, s[j]);
strcpy(s[j], s[i]);
strcpy(s[i], t);
}
As far as I know, there is no better solution than repeatedly running the Fisher-Yates shuffle until you find an arrangement without adjacent duplicates. (That's usually called a rejection strategy.)
The amount of time this will take depends on the probability that a random shuffle has adjacent duplicates, which will be low if there are few duplicates and could be as much as 1.0 if more than half of the set is the same majority element. Since the rejection strategy never terminates if there is no possible qualifying arrangement, it could be worth the trouble to verify that a solution is possible, which means that there is no majority element. There's an O(n) algorithm for that, if necessary, but given the precise details you provided, it shouldn't be necessary (yet).
You can reject immediately rather than continuing to the end of the shuffle, which significantly cuts down on the cost of running the algorithm. So just use your shuffle algorithm, but restart the counter if you place an element beside one of its twins.
By the way, using strcpy to move elements around is really inefficient. Just shuffle the pointers.
Here's some code adapted from this answer. I've assumed that the duplicates are exact, for simplicity; perhaps you have some other way of telling (like looking only at the first word):
void shuffle(const char* names[], size_t n) {
for (size_t i = 0; i < n;) {
size_t j = i + rand() % (n - i);
/* Reject this shuffle if the element we're about to place
* is the same as the previous one
*/
if (i > 0 && strcmp(names[j], names[i-1]) == 0)
i = 0;
else {
/* Otherwise, place element i and move to the next one*/
const char* t = names[i];
names[i] = s[j];
names[j] = t;
++i;
}
}
}
For your use case, where you have 10 objects with frequencies 3, 3, 2, and 2, there are 605,376 valid arrangements, out 3,628,800 (10!) total arrangements, so about five of every six shuffles will be rejected before you find a valid arrangement, on average. However, the early termination means that you will do less than six times as much work as a single shuffle; empirical results indicate that it takes about 33 swaps to produce a valid shuffle of 10 objects with the above frequencies.
Note: rand()%k is not a very good way to generate a uniform distribution of integers from 0 to k-1. You'll find lots of advice about that on this site.
import java.util.Random;
import java.util.Arrays;
public class ShuffleRand
{
static void randomize( int arr[], int n)
{
Random r = new Random();
for (int i = n-1; i > 0; i--) {
int j = r.nextInt(i+1);
int temp = arr[i];
arr[i] = arr[j];
arr[j] = temp;
}
System.out.println(Arrays.toString(arr));
}
public static void main(String[] args)
{
int[] arr = {1, 2, 3, 4, 5, 6, 7, 8};
int n = arr.length;
randomize (arr, n);
}
}
This function will shuffle array of strings randomly:
void shuffle(char *arr[], int size)
{
srand(time(NULL));
for (int i = 0; i < size; i++)
{
int b = rand() % size;
int a = rand() % size;
char *tmp = arr[a];
arr[a] = arr[b];
arr[b] = tmp;
}
}

C programming: Generate random n-combinations from given set?

Say I have a pre-specified set S of m items. I would like to generate a random combination of n (unique) items taken from S.
Is there an easy way to implement this in C? I looked into rand() but it didn't seem to do what I want.
(EDIT to add more details)
The specific problem is to randomly choose n distinct elements from an array of size m. My first instinct is to do this:
idx_array = []
int idx = rand() % m
[if idx not in idx_array, add to idx_array. Otherwise repeat above line. Repeat until idx_array has size n]
But it doesn't look like this process is truly random. I'm still new to C and really just want to know if there's a built-in function for this purpose.
Any help appreciated.
Instead of generating a number from 1 to n with the possibility of duplicate, shuffle your array and then pick out of the first n elements:
#include <stdio.h>
#include <stdlib.h>
// Randomly shuffle a array
void shuffle (int * array, int n) {
int i, j, tmp;
for (i = n - 1; i > 0; i--) {
j = arc4random_uniform(i + 1);
tmp = array[j];
array[j] = array[i];
array[i] = tmp;
}
}
int main (int argc, char const *argv[])
{
const int m = 5;
const int n = 3;
int s[m] = {10, 20, 30, 40, 50};
// Make a copy of s before shuffling it
int t[m];
for(size_t i = 0; i < m; i++)
{
t[i] = s[i];
}
shuffle(t, m);
// Now, the first n elements of t is what you want
for(size_t i = 0; i < n; i++)
{
printf("%d ", t[i]);
}
return 0;
}
Credit to Roland Illig for the Fisher-Yate shuffling function.
This is a sampling problem. There are a host of sampling algorithms but a straightforward algorithm which does the job pretty well is known as Reservoir Sampling. Refer geekforgeeks for more details on reservoir sampling.

Resources