I have a test program:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <mqueue.h>
#include <errno.h>
#include <fcntl.h>
int main() {
struct mq_attr attrs;
attrs.mq_maxmsg = 10;
attrs.mq_msgsize = sizeof(int);
const char name[] = "/test-queue";
mqd_t q = mq_open(name, O_CREAT | O_RDWR, 0600, &attrs);
if (q == (mqd_t)-1) {
perror("mq_open");
exit(EXIT_FAILURE);
}
mq_unlink(name); // it doesn't matter if I do this at the end or not
if (fork()) {
int msg = 666;
if (mq_send(q, (const char *)&msg, sizeof(msg), 1)) {
perror("mq_send");
exit(EXIT_FAILURE);
}
} else {
int msg;
unsigned priority;
if (mq_receive(q, (char *)&msg, sizeof(msg), &priority) == -1) {
perror("mq_receive");
exit(EXIT_FAILURE);
}
printf("%d\n", msg);
}
mq_close(q);
return 0;
}
I compile this program using gcc -std=c99 -Wall -o mqtest mqtest.c -lrt on two platforms:
Linux kallikanzarid-desktop 3.8.0-31-generic #46-Ubuntu SMP Tue Sep 10 20:03:44 UTC 2013 x86_64 x86_64 x86_64 GNU/Linux
FreeBSD bsd.localhost 9.2-RELEASE FreeBSD 9.2-RELEASE #0 r255898: Thu Sep 26 22:50:31 UTC 2013 root#bake.isc.freebsd.org:/usr/obj/usr/src/sys/GENERIC amd64
On Linux, everything works. On FreeBSD, I get mq_receive: Bad file descriptor. Moving the mq_unlink call to the end of main() doesn't help. Is there a way to fix this, or do I have to postpone marking the queue for deletion and reopen it after the fork?
FreeBSD does preserve message queue descriptors. See mq_open(2):
FreeBSD implements message queue based on file descriptor. The descriptor is inherited by child after fork(2). The descriptor is closed in a new image after exec(3). The select(2) and kevent(2) system calls are supported for message queue descriptor.
Edit:
The structure that mqd_t points to does contain a descriptor. But if you test that file descriptor just after the fork() using fcntl(), it also returns EBADF.
This is a bug in FreeBSD. But wether the bug is in the docs or in the implementation I cannot say.
Related
I was having a problem with one application, so i went back to the basics and grabbed the sem_timedwait example from the ubuntu focal online manpages. I modified it slightly to repro the problem.
CASE: sem_post before sem_timedwait
EXPECTED: sem_timedwait to succeed immediately
OBTAINED: sem_timedwait times out
The problem was showing initially on a Docker (WSL disabled) container with Ubuntu 20.04 (g++ 9 multilib)
I then tried from a WSL Debian 9 (g++ 6 multilib) and a WSL Ubuntu 20.04 (g++ 9 multilib) installed fresh from PowerShell
I further installed a full fresh Ubuntu 20.04 VM with g++ 9 multilib on Hyper-V
I also tried apt update && apt upgrade to be sure to be on the latest packages, I also tried at some point to completely remove g++ 9 and all its dependencies and use g++ 10 (which comes with libasan.so.6 instead of libasan.so.5)
Original sem_timedwait example from Ubuntu
Modified version, added a sleep before sem_timedwait so that the call to sem_timedwait happens always after the sem_post. I also added a print of sem_getvalue to verify that the semaphore counter was being incremented correctly to 1.
[File: test_sem.cpp]
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <semaphore.h>
#include <time.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
sem_t sem;
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
static void
handler(int sig)
{
write(STDOUT_FILENO, "sem_post() from handler\n", 24);
if (sem_post(&sem) == -1) {
write(STDERR_FILENO, "sem_post() failed\n", 18);
_exit(EXIT_FAILURE);
}
}
int
main(int argc, char *argv[])
{
struct sigaction sa;
struct timespec ts;
int s;
if (argc != 3) {
fprintf(stderr, "Usage: %s <alarm-secs> <wait-secs>\n",
argv[0]);
exit(EXIT_FAILURE);
}
if (sem_init(&sem, 0, 0) == -1)
handle_error("sem_init");
/* Establish SIGALRM handler; set alarm timer using argv[1] */
sa.sa_handler = handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
if (sigaction(SIGALRM, &sa, NULL) == -1)
handle_error("sigaction");
alarm(atoi(argv[1]));
/* Calculate relative interval as current time plus
number of seconds given argv[2] */
if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
handle_error("clock_gettime");
ts.tv_sec += atoi(argv[2]);
//this is a cancellation point when the alarm goes off
sleep(atoi(argv[1]) + 2);
int value = 0;
sem_getvalue(&sem, &value);
printf("sem_getvalue(): %d\n", value);
sleep(2);
printf("main() about to call sem_timedwait()\n");
while ((s = sem_timedwait(&sem, &ts)) == -1 && errno == EINTR)
continue; /* Restart if interrupted by handler */
/* Check what happened */
if (s == -1) {
if (errno == ETIMEDOUT)
printf("sem_timedwait() timed out\n");
else
perror("sem_timedwait");
} else
printf("sem_timedwait() succeeded\n");
exit((s == 0) ? EXIT_SUCCESS : EXIT_FAILURE);
}
to compile this example i used the following
g++ -std=gnu++17 -m32 -fsanitize=address -fsanitize-recover=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer test_sem.cpp -lstdc++ -lpthread -lasan
to run it, simply ./a.out 2 5
what I obtain is the following unexpected result:
sem_post() from handler
sem_getvalue(): 1
main() about to call sem_timedwait()
sem_timedwait() timed out
the same code compiled WITHOUT the -m32 flag g++ -std=gnu++17 -fsanitize=address -fsanitize-recover=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer test_sem.cpp -lstdc++ -lpthread -lasan gives me the following expected result
sem_post() from handler
sem_getvalue(): 1
main() about to call sem_timedwait()
sem_timedwait() succeeded
the same code compiled WITH the -m32 flag but WITHOUT the libasan g++ -std=gnu++17 -m32 test_sem.cpp -lstdc++ -lpthread -lasan gives me the following expected result:
sem_post() from handler
sem_getvalue(): 1
main() about to call sem_timedwait()
sem_timedwait() succeeded
Just for the sake of me I tried also to replace the signal handler code with a second thread to achieve the same sem_post before sem_timedwait and I obtain the same exact result. I further tried also using the non-POSIX-compliant sem_clockwait using both CLOCK_REALTIME and CLOCK_MONOTONIC and I got the same exact result.
I also tried completely removing g++ 9 and installed g++ 10 (which uses libasan.so.6 instead of libasan.so.5)
Right now I dont know if it is something on my side but seems that Docker Ubuntu 20.04 (no WSL), Debian 9 WSL 2, Ubuntu 20.04 WLS 2, and full Hyper-V Virtual Machine with Ubuntu 20.04 are all giving me the same result.
I tried everything I could think of to no avail.
Think of this as a continuation of the good advice here:
https://stackoverflow.com/a/56780616/16739703
except that I am hoping not to modify the child process.
Edit: I have written code which minimises to:
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char *argv[], char *envp[]) {
int init_flags=fcntl(0, F_GETFL, 0);
if (fcntl(0, F_SETFL, init_flags | O_ASYNC)) {
perror("fcntl...F_SET_FL....O_ASYNC");
exit(1);
}
if (fcntl(0, F_SETOWN, getpid())) {
perror("fcntl...F_SETOWN...)");
exit(1);
}
if (execve(argv[1], argv+1, envp)) {
perror("execve");
exit(1);
}
return 1;
}
and this makefile:
all: morehup
CFLAGS=-g -D_GNU_SOURCE
LDFLAGS=-g
so that, with this procedure:
parent> export TMPDIR="$(mktemp -d)"
parent> mkfifo $TMPDIR/fifo
parent> sh
# you get a new shell, probably with a different prompt
parent> exec 7<>$TMPDIR/fifo
# must be both input and output, or the process stalls
child> TMPDIR=... # as other shell
child> ./morehup <$TMPDIR/fifo /bin/sh -c "while true; do date; sleep 5; done"
# you get a list of dates
parent> exit
child> I/O possible # followed by a prompt, with no more dates
the kernel will kill the child when the parent exits.
The more configurable version is here:
https://github.com/JamesC1/morehup/blob/main/morehup.c
I have two questions:
What are the chances of adding modest amounts of code, so that this will mostly work for most of the common *nix?
Is there a posix utility that already does something like this? ie am I reinventing the wheel, and if so, what is it called?
My test program is calling stat(2) to obtain a device the file resides on.
stat.c (built with cc stat.c -o stat)
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/sysmacros.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
int main()
{
char *path = "/home/smoku/test.txt";
unsigned int maj, min;
struct stat sb;
if (stat(path, &sb) < 0) {
fprintf(stderr, "Error getting stat for '%s': %d %s\n", path, errno, strerror(errno));
return 1;
}
maj = major(sb.st_dev);
min = minor(sb.st_dev);
fprintf(stderr, "Found '%s' => %u:%u\n", path, maj, min);
return 0;
}
Got 0:44
$ ls -l /home/smoku/test.txt
-rw-r--r-- 1 smoku smoku 306 08-30 09:33 /home/smoku/test.txt
$ ./stat
Found '/home/smoku/test.txt' => 0:44
$ /usr/bin/stat -c "%d" /home/smoku/test.txt
44
But... there is no such device in my system and /home is 0:35
$ grep /home /proc/self/mountinfo
75 59 0:35 /home /home rw,relatime shared:30 - btrfs /dev/bcache0 rw,ssd,space_cache,subvolid=258,subvol=/home
Why do I get a device ID that does not exist in my system?
stat(2) in fs/stat.c uses inode->i_sb->s_dev to fill stat.st_dev
/proc/self/mountinfo in fs/proc_namespace.c uses mnt->mnt_sb->s_dev
Apparently struct inode.i_sb superblock may be different to struct vfsmount.mnt_sb superblock in case of mount of btrfs subvolume.
This is an issue inherent to btrfs implementation, which "requires non-trivial changes in the VFS layer" to fix: https://mail-archive.com/linux-btrfs#vger.kernel.org/msg57667.html
I am experiencing a strange problem with the the popen and fgets library functions on a Linux system.
A short program demonstrating the problem is below that:
Installs a signal handler for SIGUSR1.
Creates a secondary thread to repeatedly send SIGUSR1 to the main thread.
In the main thread, repeatedly executes a very simple shell command via popen(), gets the output via fgets(), and checks to see if the output is of the expected length.
The output is unexpectedly truncated intermittently. Why?
Command-line invocation example:
$ gcc -Wall test.c -lpthread && ./a.out
iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
unexpected length: 0
Details of my machine (the program will also compile and run with this online C compiler):
$ cat /etc/redhat-release
CentOS release 6.5 (Final)
$ uname -a
Linux localhost.localdomain 2.6.32-431.17.1.el6.x86_64 #1 SMP Wed May 7 23:32:49 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
# gcc 4.4.7
$ gcc --version
gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-4)
Copyright (C) 2010 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# glibc 2.12
$ ldd --version
ldd (GNU libc) 2.12
Copyright (C) 2010 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Written by Roland McGrath and Ulrich Drepper.
The program:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <pthread.h>
#include <errno.h>
void dummy_signal_handler(int signal);
void* signal_spam_task(void* arg);
void echo_and_verify_output();
char* fgets_with_retry(char *buffer, int size, FILE *stream);
static pthread_t main_thread;
/**
* Prints an error message and exits if the output is truncated, which happens
* about 5% of the time.
*
* Installing the signal handler with the SA_RESTART flag, blocking SIGUSR1
* during the call to fgets(), or sleeping for a few milliseconds after the
* call to popen() will completely prevent truncation.
*/
int main(int argc, char **argv) {
// install signal handler for SIGUSR1
struct sigaction sa, osa;
sa.sa_handler = dummy_signal_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
sigaction(SIGUSR1, &sa, &osa);
// create a secondary thread to repeatedly send SIGUSR1 to main thread
main_thread = pthread_self();
pthread_t spam_thread;
pthread_create(&spam_thread, NULL, signal_spam_task, NULL);
// repeatedly execute simple shell command until output is unexpected
unsigned int i = 0;
for (;;) {
printf("iteration %u\n", i++);
echo_and_verify_output();
}
return 0;
}
void dummy_signal_handler(int signal) {}
void* signal_spam_task(void* arg) {
for (;;)
pthread_kill(main_thread, SIGUSR1);
return NULL;
}
void echo_and_verify_output() {
// run simple command
FILE* stream = popen("echo -n hello", "r");
if (!stream)
exit(1);
// count the number of characters in the output
unsigned int length = 0;
char buffer[BUFSIZ];
while (fgets_with_retry(buffer, BUFSIZ, stream) != NULL)
length += strlen(buffer);
if (ferror(stream) || pclose(stream))
exit(1);
// double-check the output
if (length != strlen("hello")) {
printf("unexpected length: %i\n", length);
exit(2);
}
}
// version of fgets() that retries on EINTR
char* fgets_with_retry(char *buffer, int size, FILE *stream) {
for (;;) {
if (fgets(buffer, size, stream))
return buffer;
if (feof(stream))
return NULL;
if (errno != EINTR)
exit(1);
clearerr(stream);
}
}
If an error occurs on a FILE stream while reading with fgets, it's undefined as to whether some bytes read are transferred to the buffer before fgets returns NULL or not (7.19.7.2 of the C99 spec). So if the SIGUSR1 signal occurs while in the fgets call and causes an EINTR, its possible that some characters may be lost from the stream.
The upshot is that you can't use stdio functions to read/write FILE objects if the underlying system calls might have recoverable error returns (such as EINTR or EAGAIN), as there's no guarantee the standard library won't lose some data from the buffer when that happens. You can claim that this is a "bug" in the standard library implementation, but it is a bug that the C standard allows.
I am writing a multi-process program using named semaphores, in the master process I am opening the semaphore with the following code
semaphore = sem_open("/msema",O_RDWR|O_CREAT|O_TRUNC,00777,1);
if (semaphore == SEM_FAILED)
perror("SEMAPHORE");
and in the child program
count_sem=sem_open("/msema",O_RDWR);
if(count_sem==SEM_FAILED)
{
perror("sem_open");
return 1;
}
on sem_wait()
do {
errno=0;
printf("BeforeSemWait\n");
rtn=sem_wait(count_sem);
printf("afterSemWait\n");
} while(errno==EINTR);
if(rtn < 0) {
printf("Error\n");
perror("sem_wait()");
sem_close(count_sem);
return 1;
}
I am getting a bus error from sem_wait()
BeforeSemWait
Program received signal SIGBUS, Bus error.
0x00a206c9 in sem_wait##GLIBC_2.1 () from /lib/libpthread.so.0`
What am I doing wrong?
edit: entire code:
master.c: http://pastebin.com/3MnMjUUM
worker.c http://pastebin.com/rW5qYFqg
You must have somewhere else a bug in your program. The following works here (O_TRUNC is not needed):
semproducer.c:
#include <fcntl.h>
#include <stdio.h>
#include <semaphore.h>
int main () {
sem_t *sem=sem_open("/msema",O_RDWR|O_CREAT /* |O_TRUNC*/ ,00777,1);
if (sem==SEM_FAILED) {
perror("sem_open");
}
else {
while (1) {
sem_post (sem);
printf ("sem_post done\n");
sleep (5);
}
}
}
semconsumer.c:
#include <fcntl.h>
#include <stdio.h>
#include <semaphore.h>
#include <errno.h>
int main () {
sem_t *count_sem=sem_open("/msema",O_RDWR);
if(count_sem==SEM_FAILED) {
perror("sem_open");
return 1;
}
do {
int rtn;
do {
errno=0;
rtn=sem_wait(count_sem);
} while(errno==EINTR);
if(rtn < 0) {
perror("sem_wait()");
sem_close(count_sem);
return 1;
}
printf ("sema signalled\n");
} while (1);
}
compile with gcc semproducer.c -o semproducer -lrt and gcc semconsumer.c -o semconsumer -lrt
I encountered this exact error.
It was because I hadn't linked in the Real-Time library that provides POSIX real-time functionality. The commenter above briefly mentioned the "-lrt" parameter in his solution, but did not emphasize it as the reason that you were getting your exception. You can verify this with the following code:
int main(void)
{
sem_t* my_sem = sem_open("/test_sem", O_CREAT, S_IRUSR | S_IWUSR, 0);
sem_wait(my_sem);
}
Assuming this is in the file "test.c" and I run the following:
gcc ./test.c -o test.out
./test.out
I get the Bus Error Output:
Bus error
But with the following command:
gcc ./test.c -o test.out -lrt
./test.out
The program does not except, and instead waits on the semaphore forever which is expected with a value of zero.
You should NOT pass -lrt, however. The linux documentation for sem_wait(3) indicates that the correct command is "-pthread". Substituting "-lrt" with "-pthread" in my testing indicates that it has the same effect, so the list of flags passed by -pthread on Linux likely includes "-lrt".
As such, the correct compiler invocation would be
gcc ./test.c -o test.out -pthread