Segmentation fault (core dumped) on C loop with pthreads - c

I keep getting a segmentation fault on the following code and cannot figure out why.
Please help me fix it.
#include <string.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "tempo.h"
int num_thread = 12;
//int ini[20];
//time_t start, end;
int threadShare;
int end = 127;
int start = 32;
void *loop(void *arg);
pthread_t *m;
void
main(int argc, char *argv[])
{
double qt = (((double) end - (double) start) / (double) num_thread);
threadShare = round(qt);
m = malloc(sizeof(pthread_t) * num_thread);
int ptStart[num_thread + 1];
// creates the pthreads and divides the work
for (int i = 0; i < num_thread; i++) {
pthread_create(&m[i], NULL, loop, (void *) &ptStart[i]);
ptStart[i + 1] = ptStart[i] + threadShare;
// i, ptStart[i]);
}
// closes the pthreads
for (int i = 0; i < num_thread; i++) {
pthread_join(ptStart[i], NULL);
}
}
void *
loop(void *arg)
{
//password to be broken
char senha[40] = "!!!#(,4#";
char linha[40];
// 8 loops parsing through the ascii characters
for (char v1 = 32; v1 < 127; v1++) {
linha[0] = v1;
for (char v2 = 32; v2 < 127; v2++) {
linha[1] = v2;
for (char v3 = 32; v3 < 127; v3++) {
linha[2] = v3;
for (char v4 = 32; v4 < 127; v4++) {
linha[3] = v4;
for (char v5 = 32; v5 < 126; v5++) {
linha[4] = v5;
for (char v6 = 32; v6 < 127; v6++) {
linha[5] = v6;
for (char v7 = 32; v7 < 127; v7++) {
linha[6] = v7;
for (char v8 = 32; v8 < 127; v8++) {
linha[7] = v8;
printf("%s\n", linha);
// compares the password with the
// characters parsed in the loop
int ok = strcmp(senha, linha);
if (ok == 0) {
exit(0);
}
}
}
}
}
}
}
}
}
}
This code should divide the task of brute forcing a password among a variable number of pthreads. I am a newbie at c and cannot figure out the issue.
I am pretty sure the issue occurs inside the loop, as v1 will iterate through the ascii characters before the segmentation fault occurs.
Edit: Tried to clean up the code a little, alas i am a total beginner with c

Related

Get value at memory address of a process and modify it

I was trying to emulate what Cheat Engine does on mac os in getting the memory address from values and modifying it. I have done this so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <libproc.h>
#include <mach/mach_init.h>
// Get array of all process ids
uint32_t* get_pids(uint16_t* size) {
uint32_t number_of_pids = proc_listpids(1, 0, NULL, 0);
uint32_t* buffer = malloc(sizeof(uint32_t) * number_of_pids);
uint8_t return_code = proc_listpids(1, 0, buffer, sizeof(buffer) * number_of_pids);
uint16_t sum = 0;
for(int i = 0; i < number_of_pids; i++) {
if(buffer[i] != 0) {
sum++;
}
}
uint32_t* final = malloc(sizeof(uint32_t) * sum);
for(int i = 0, t = 0; i < number_of_pids; i++) {
if(buffer[i]) {
final[t++] = buffer[i];
}
}
*size = sum;
return final;
}
int main() {
uint16_t size;
uint32_t* pids = get_pids(&size);
uint16_t maxpathlength = 1024;
uint16_t path_size = maxpathlength * 4;
char path_buffer[path_size];
uint32_t pid = 0;
for(int i = 0; i < size; i++) {
memset(path_buffer, '\0', sizeof(path_buffer));
uint8_t return_code = proc_pidpath(pids[i], path_buffer, path_size);
if(strstr(path_buffer, "Geometry Dash")) {
pid = pids[i];
}
//printf("PID: %d, Process: %s\n", pids[i], path_buffer);
}
mach_port_name_t port = 0;
if(task_for_pid(mach_task_self(), pid, &port)) {
printf("Run as root!\n");
}
printf("%d\n", port);
return 0;
}
So I got there and now have the mach port of the target pid however I am not sure where to go from here as I have found practically 0 good documentation on the mach_vm methods and anything I try fails. How should I go about doing this?

Longest Palindromic Subsequence Multithread in C

I'm trying to learn how to multithread with c, and thought that the longest palindromic subsequence problem would be a good place to start.
The idea is that we run two threads and compare their results to find the answer. One thread deals with "odd" subsequences, the other with "even" ones.
Although the code below seems to work, my question is: where in the program should I check for multi-threading errors? It is very new to me so I just need to know what parts may be prone to the issues that multi-threading brings.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
struct str{
char* seq;
int len;
};
void *odd(void* arg){
struct str index = *(struct str*)arg;
int maxAns = 1;
for(int i = 1; i < index.len; i++){
int low = i - 1;
int high = i + 1;
int currMax = 1;
while(low >= 0 && high < index.len && index.seq[low] == index.seq[high]){
low--;
high++;
currMax=currMax+2;
}
if(currMax > maxAns){
maxAns = currMax;
}
}
int* res = malloc(sizeof(int));
*res = maxAns;
free(arg);
return (void*)res;
}
void *even(void* arg){
struct str index = *(struct str*)arg;
int maxAns = 0;
for(int i = 0; i < index.len; i++){
int low = i;
int high = i + 1;
int currMax = 0;
while(low >= 0 && high < index.len && index.seq[low] == index.seq[high]){
low--;
high++;
currMax=currMax+2;
}
if(currMax > maxAns){
maxAns = currMax;
}
}
int* res = malloc(sizeof(int));
*res = maxAns;
free(arg);
return (void*)res;
}
int main(void){
char seq0[] = "aaasaaasadaadsdafa";
int len = sizeof(seq0)/sizeof(seq0[0])-1;
struct str* s0 = malloc(sizeof(struct str));
struct str* s1 = malloc(sizeof(struct str));
s0->seq = (char*)seq0;
s1->seq = (char*)seq0;
s0->len = len;
s1->len = len;
pthread_t t0;
pthread_t t1;
int* res0;
int* res1;
if (pthread_create(&t0, NULL, &odd, s0)!=0){
return 0;
}
if (pthread_create(&t1, NULL, &even, s1)!=0){
return 00;
}
if(pthread_join(t0, (void**)&res0)!=0){
return 1;
}
if(pthread_join(t1, (void**)&res1)!=0){
return 11;
}
if(*res0 > *res1){
printf("%d\n", *res0);
}else{
printf("%d\n", *res1);
}
free(s0);
free(s1);
return 0;
}

Changing irrelevant part of the function changes papi measurement of branch prediction

I am playing with the codes that I found online and I want to try different branch prediction codes to have a better understanding of branch predictors.
CPU is AMD Ryzen 3600.
Basically, what I am doing is in the code below, I am trying to measure a misprediction rate of a given function/code segment. As a pseudo/shortened code, here is what I do:
int measurement(int length, int arr){
r1 = papi_read();
for(;i<len;){
if(arr[j]){
do_sth();
}
}
r2 = papi_read();
return r2-r1;
}
void warmup(){
for(volatile int i = 0; i< 10000; i++){
for(volatile int j = 0; j < 100; j++){} // important line
}
}
int main() {
init_papi();
init_others(); //creates arrays, initialize them, etc.
warmup();
for(int i = 0; i < 20; i++){
results[i] = measurement(128, array);
usleep(1200); // 2nd important line
}
print_mispredictions();
}
My setup
I have isolated the core I am working on so that there is no other user process in that core. I have also isolated the sibling one so that I am full in charge of both of the cores, except there is an interrupt or a routine.
Previously, I have seen that if I use sleep between iterations (as in main function), the CPU enters a deeper level C-state so the branch prediction units (BHT in this case) resets. This is the explanation of 2nd important line in the code.
What I want to see
Without the sleep line I am seeing that in each iteration I am having lower and lower misprediction rates. That is because of the branch predictors are learning the pattern in the array.
With the sleep line, what I want to achieve is, at each iteration, I should see similar misprediction numbers, as BPU entries are being reset.
What is the problem
The problem is when I change the warmup line from
void warmup(){
for(volatile int i = 0; i< 10000; i++){
for(volatile int j = 0; j < 100; j++){}
}
}
to
void warmup(){
for(volatile int i = 0; i< 1000000; i++){ // notice that I have the
// same amount of iteration
}
}
then the measurements are messed up. I have experienced this kind of issue in my previous question, which is never ever answered. Changing a line that is irrelevant to the measurement changes the measurement behavior.
This is my results:
# With 1 for loop, expected behavior. At every iteration, it is reset to ~ 60
# For 128 if statement, 60 misprediction is 50% guessing.
$ ./exp
0:73 #iteration count, misprediction count
1:62
2:63
3:21
4:63
...
# With 2 for loops. Unexpected behavior. It should always reset to ~ 60 but it keeps decreasing.
./exp
0:66
1:18
2:4
3:4
4:1
5:0
6:0
...
Putting a mfence or lfence instruction after the warmup doesn't change the result either.
Below, I am putting the whole code in case someone wants to try and/or has an answer for this behavior.
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/sysinfo.h>
#include <time.h>
#include "papi.h"
#define stick_this_thread_to_core(retval, core_id){ \
int num_cores = sysconf(_SC_NPROCESSORS_ONLN); \
if (core_id < 0 || core_id >= num_cores) \
retval = EINVAL; \
cpu_set_t cpuset; \
CPU_ZERO(&cpuset); \
CPU_SET(core_id, &cpuset); \
retval = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);\
}
#define ERROR_RETURN(retval) { \
fprintf(stderr, "Error %d, (%s) %s:line %d: \n", retval,PAPI_strerror(retval), __FILE__,__LINE__); \
exit(retval); \
}
void papi_my_native_add_event(int* EventSet1, char* eventname, int *native){
int retval;
//printf("native add\n");
if((retval = PAPI_event_name_to_code(eventname, native)) != PAPI_OK)
ERROR_RETURN(retval);
//printf("native add to_code is successful\n");
if ((retval = PAPI_add_event(*EventSet1, *native)) != PAPI_OK)
ERROR_RETURN(retval);
//printf("native add add_event is successful\n");
int number = 0;
if((retval = PAPI_list_events(*EventSet1, NULL, &number)) != PAPI_OK)
ERROR_RETURN(retval);
//fprintf(stderr, "Added %d events.\n", number);
}
void papi_my_native_add_start_event(int* EventSet1, char* eventname, int *native){
papi_my_native_add_event(EventSet1, eventname, native);
int retval = 0;
if((retval = PAPI_start(*EventSet1)) != PAPI_OK)
ERROR_RETURN(retval);
//printf("START %s\n", eventname);
}
int RNG_SIZE = 128;
uint64_t* rng_arr;
uint64_t dummy;
// 12th core
int cpuid = 11;
// Code from
// https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/tree/master/2019/11/05
// acts like a random number generator, but it is deterministic.
static inline uint64_t rng(uint64_t h) {
h ^= h >> 33;
h *= UINT64_C(0xff51afd7ed558ccd);
h ^= h >> 33;
h *= UINT64_C(0xc4ceb9fe1a85ec53);
h ^= h >> 33;
return h;
}
uint64_t measurement(int* EventSet, uint64_t howmany, uint64_t* arr){
long long reads[2] = {0};
PAPI_read(*EventSet, &reads[0]);
for(int j = 0; j < howmany; j++){
if(arr[j]){
dummy &= arr[j];
}
}
PAPI_read(*EventSet, &reads[1]);
return (reads[1] - reads[0]);
}
void precompute_rng(){
int howmany = RNG_SIZE;
for(int i = 0; i < RNG_SIZE; i++){
rng_arr[i] = rng(howmany) &0x1;
howmany--;
}
}
int stick_to_core(){
int retval = 0;
stick_this_thread_to_core(retval, cpuid);
if(retval){
printf("Affinity error: %s\n", strerror(errno));
return 1;
}
return 0;
}
void init_papi(int* EventSet, int cpuid){
int retval = 0;
// papi init
if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT )
ERROR_RETURN(retval);
PAPI_option_t opts1;
opts1.cpu.cpu_num = cpuid;
if((retval = PAPI_create_eventset(EventSet)) != PAPI_OK)
ERROR_RETURN(retval);
if((retval = PAPI_assign_eventset_component(*EventSet, 0)) != PAPI_OK)
ERROR_RETURN(retval);
opts1.cpu.eventset = *EventSet;
if((retval =PAPI_set_opt(PAPI_CPU_ATTACH, &opts1)) != PAPI_OK)
ERROR_RETURN(retval);
char* eventname = "RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED";
unsigned int native = 0x0;
papi_my_native_add_start_event(EventSet, eventname, &native);
}
void warmup(){
for(volatile int i = 0; i< 100000; i++){
for(volatile int j = 0; j < 100; j++){} // important line
}
}
int main() {
if(stick_to_core()){
printf("Error on sticking to the core\n");
return 1;
}
int EventSet = PAPI_NULL;
int* EventSetPtr = &EventSet;
init_papi(EventSetPtr, cpuid);
rng_arr = (uint64_t*) malloc(RNG_SIZE * sizeof(uint64_t));
precompute_rng(cpuid);
int iter = 4096;
uint64_t* results = (uint64_t*) malloc(iter * sizeof(uint64_t));
for(int i = 0; i < iter; i++)
results[i] = 0;
warmup();
for(int i = 0; i < 20; i++){
results[i] = measurement(&EventSet, RNG_SIZE, rng_arr);
usleep(1200);
}
// prints
for(int i = 0; i < 20; i++){
printf("%d:%ld\n", i, results[i]);
}
printf("\n");
free(results);
return 0;
}
Compile with
gcc -O0 main.c -lpthread -lpapi -o exp

Difference in behavior between clang and gcc?

I'm writing a C function to simulate a cache given an address trace. The function works as expected when compiled on my mac using gcc (really clang). gcc --version on my mac returns this:
Configured with: --prefix=/Applications/Xcode.app/Contents/Developer/usr --with-gxx-include-dir=/usr/include/c++/4.2.1
Apple LLVM version 8.1.0 (clang-802.0.42)
When I compile the same program on linux using gcc, the returns are way off, and eC & hC in my program (cache eviction counter and hit counter) are in the hundreds of thousands, when they should be below 10. When typing gcc --version on the linux machine, it returns this:
gcc (Ubuntu 4.9.3-8ubuntu2~14.04) 4.9.3
Here is the program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include <getopt.h>
#include "cachelab.h"
typedef struct{
int v;
int t;
int LRU;
} block;
typedef struct{
block *blocks;
} set;
typedef struct{
set *sets;
} cache;
void simulate(int s, int E, int b, char* file, int* hC, int* mC, int* eC)
{
int numSets = (1 << s);
char operation;
int address;
int size;
int curTag;
int curSet;
int maxLRU = 0;
int curLRU = 0;
int check = 0;
cache c;
set *sets = malloc(sizeof(set) * numSets);
c.sets = sets;
int i = 0;
while(i < numSets)
{
c.sets[i].blocks = malloc(sizeof(block) * E);
for (int j = 0; j < E; j++)
{
c.sets[i].blocks[j].v = 0;
c.sets[i].blocks[j].t = INT_MIN;
c.sets[i].blocks[j].LRU = 0;
}
i++;
}
FILE *f = fopen(file, "r");
while(fscanf(f," %c %x,%d", &operation, &address, &size) != EOF)
{
check = 0;
curTag = ((unsigned int) address) >> (s+b);
curSet = (address >> b) & ((1 << s) - 1);
for (int i = 0; i < E; i++)
{
c.sets[curSet].blocks[i].LRU++;
if(c.sets[curSet].blocks[i].LRU >= maxLRU)
{
maxLRU = c.sets[curSet].blocks[i].LRU;
curLRU = i;
}
if(curTag == c.sets[curSet].blocks[i].t)
{
*hC = *hC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[i].LRU = 0;
check = 1;
}
}
if(check == 0)
{
for(int i = 0; i < E; i++)
{
if(c.sets[curSet].blocks[i].v == 0)
{
*mC = *mC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[i].v = 1;
c.sets[curSet].blocks[i].LRU = 0;
c.sets[curSet].blocks[i].t = curTag;
check = 1;
break;
}
}
}
if(check == 0)
{
*eC = *eC + 1;
*mC = *mC + 1;
if (operation == 'M')
{
*hC = *hC + 1;
}
c.sets[curSet].blocks[curLRU].t = curTag;
c.sets[curSet].blocks[curLRU].v = 1;
c.sets[curSet].blocks[curLRU].LRU = 0;
}
}
}
int main(int argc, char** argv)
{
int hitCount, missCount, evictionCount;
int s, E, b;
char *file;
char opt;
while((opt = getopt(argc,argv,"v:h:s:E:b:t:")) != -1)
{
switch(opt){
case 'v':
break;
case 'h':
break;
case 's':
s = atoi(optarg);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
file = optarg;
break;
default:
exit(1);
}
}
simulate(s, E, b, file, &hitCount, &missCount, &evictionCount);
printSummary(hitCount, missCount, evictionCount);
return 0;
}
EDIT:
I understand that this is due to a difference between clang and gcc. Does anyone have any information about how I can go about fixing this discrepancy?
Here is cachelab.c:
/*
* cachelab.c - Cache Lab helper functions
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "cachelab.h"
#include <time.h>
trans_func_t func_list[MAX_TRANS_FUNCS];
int func_counter = 0;
/*
* printSummary - Summarize the cache simulation statistics. Student cache simulators
* must call this function in order to be properly autograded.
*/
void printSummary(int hits, int misses, int evictions)
{
printf("hits:%d misses:%d evictions:%d\n", hits, misses, evictions);
FILE* output_fp = fopen(".csim_results", "w");
assert(output_fp);
fprintf(output_fp, "%d %d %d\n", hits, misses, evictions);
fclose(output_fp);
}
/*
* initMatrix - Initialize the given matrix
*/
void initMatrix(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
B[j][i]=rand();
}
}
}
void randMatrix(int M, int N, int A[N][M]) {
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
}
}
}
/*
* correctTrans - baseline transpose function used to evaluate correctness
*/
void correctTrans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerTransFunction - Add the given trans function into your list
* of functions to be tested
*/
void registerTransFunction(void (*trans)(int M, int N, int[N][M], int[M][N]),
char* desc)
{
func_list[func_counter].func_ptr = trans;
func_list[func_counter].description = desc;
func_list[func_counter].correct = 0;
func_list[func_counter].num_hits = 0;
func_list[func_counter].num_misses = 0;
func_list[func_counter].num_evictions =0;
func_counter++;
}
You forgot to initialize the counters and flags so they start at undefined values. The following lines:
int hitCount, missCount, evictionCount;
int s, E, b;
should be:
int hitCount = 0, missCount = 0, evictionCount = 0;
int s = 0, E = 0, b = 0;
It just happens that the initial values happen to be lower on the mac so you're not getting correct results on the mac either (at least not guaranteed since the initial value is undefined).

Dynamic 2D Array with realloc gives segmentation fault, but works with malloc

I have a problem with my dynamic 2d array.
With malloc it worked. With realloc, it failed.
This dosen't work:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *const *argv) {
unsigned ** gmatrix = NULL;
int cap = 4;
/*
...
*/
gmatrix = realloc(gmatrix, 4 * sizeof(unsigned*));
for(unsigned i = 0; i < cap; i++) {
gmatrix[i] = realloc(gmatrix, cap* sizeof(unsigned));
}
// initialize:
for(unsigned i = 0; i < cap; i++) {
for(unsigned j = 0; j < cap; j++) {
gmatrix[i][j] = 0;
}
}
}
But this does:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *const *argv) {
unsigned ** gmatrix = NULL;
int cap = 4;
/*
...
*/
gmatrix = malloc(cap * sizeof(unsigned*));
for(unsigned i = 0; i < cap; i++) {
gmatrix[i] = malloc(cap* sizeof(unsigned));
}
for(unsigned i = 0; i < cap; i++) {
for(unsigned j = 0; j < cap; j++) {
gmatrix[i][j] = 0;
}
}
}
In the first code part I get a segmentation fault error. Why?
gmatrix[i] = realloc(gmatrix, cap* sizeof(unsigned));
should be
gmatrix[i] = realloc(gmatrix[i], cap* sizeof(unsigned));
Using gmatrix instead of gmatrix[i] will lead to Undefined Behavior and the segmentation fault which you experience is one of the side-effects of Undefined Behavior.
Edit:
You should initialize gmatrix[i] to NULL after the first malloc as #MattMcNabb pointed out. So use the following after the first call to realloc:
for(unsigned i = 0; i < cap; i++) {
gmatrix[i] = NULL;
gmatrix[i] = realloc(gmatrix[i], cap* sizeof(unsigned));
}

Resources