Here's the situation. I'm debugging a code to do a logging function. When the user log in, the log file will be create with .part format. This file is save locally inside the host. I do not know why it's name as .part. When the user finish their session, the log file will be rename as .username only. Beside the local log file, this code is also connected to a server where this server will also save the logging file. The problem is when the logging is still running, but the host suddenly reboot. The reboot might be caused by command from root, or a force reboot, or maybe a hardware fault. This causes the logging file to stay as .part and the server also follows.
So, my question is:
How to make it rename the file before the process is killed or terminated during reboot?
Whats the signal that I should handle?
I'm thinking this might involve a race condition, is there a way for me to delay the reboot?
My approach
tried to handle SIGPWR,SIGSTOP,SIGTERM,SIGQUIT
create a bash script to do renaming when the process start.
Here is the main code:
int main(int argc, char **argv)
{
int ch;
int NoFork = 0;
struct event_config *evconfig;
struct event *signal_event_int;
struct event *signal_event_quit;
struct event *signal_event_term;
struct event *signal_event_hup;
struct event *signal_event_chld;
struct event *signal_event_pwr;
struct event *signal_event_stop;
syspath_init_from_argv0(argv[0]);
load_config(); /* load config first, the command line parameters will override */
event_set_log_callback(my_event_log_cb);
evconfig = event_config_new();
if (event_config_require_features(evconfig, EV_FEATURE_FDS)!=0) {
log_error("event_config_require_features_failed");
}
while (!done) {
/* ignore HUP first, just in case someone send us a HUP
when we are reloading config, that will create a condition
that makes us exit, with HangUp */
sig_catch(SIGHUP,SIG_IGN);
base = event_base_new_with_config(evconfig);
local_listener = create_local_listener(base);
if (!local_listener) {
log_error("Could not create a local listener!");
return 1;
}
http_listener = create_http_listener();
if (!http_listener) {
log_error("Could not create a remote listener!");
return 1;
}
evhttp_set_cb(http_listener, "/mrexec", http_mrexec_cb, NULL);
if (options.accept_remote) {
evhttp_set_cb(http_listener, "/rlog", http_rlog_cb, NULL);
}
if (pidfile_create(ACTSLOGD_PIDFILE)==-1) {
log_error("pidfile_create:failed:%d:%s", errno, strerror(errno));
}
LIST_INIT(&clientlist);
if (options.log_remote) {
start_log_remote();
}
signal_event_int = evsignal_new(base, SIGINT, exit_cb, (void *)base);
event_add(signal_event_int, NULL);
signal_event_quit = evsignal_new(base, SIGQUIT, exit_cb, (void *)base);
event_add(signal_event_quit, NULL);
signal_event_term = evsignal_new(base, SIGTERM, exit_cb, (void *)base);
event_add(signal_event_term, NULL);
signal_event_hup = evsignal_new(base, SIGHUP, reload_config_cb, (void *)base);
event_add(signal_event_hup, NULL);
signal_event_chld = evsignal_new(base, SIGCHLD, sigchld_cb, (void *)base);
event_add(signal_event_chld, NULL);
signal_event_pwr = evsignal_new(base, SIGPWR, power_off_cb, (void *)base);
event_add(signal_event_pwr, NULL);
signal_event_stop = evsignal_new(base, SIGSTOP, power_off_cb, (void *)base);
event_add(signal_event_chld, NULL);
actslog_event_start(AGENT_ACTSLOGD);
actslog_event_start(AGENT_ESCALATED);
event_base_dispatch(base);
printf("finished dispatch\n");
evconnlistener_free(local_listener);
evhttp_free(http_listener);
http_listener = NULL;
event_free(signal_event_int);
event_free(signal_event_quit);
event_free(signal_event_term);
event_free(signal_event_hup);
event_free(signal_event_pwr);
event_free(signal_event_stop);
if (options.log_remote) {
end_log_remote();
}
event_base_free(base);
if (!done) {
load_config();
}
while (clientlist.lh_first != NULL) {
struct bufferevent *bev = clientlist.lh_first->bev;
bufferevent_free(bev);
LIST_REMOVE(clientlist.lh_first, clients);
}
}
if (rlog) {
rlog_close(rlog);
}
unlink(PATH_ACTSLOG);
pidfile_cleanup(ACTSLOGD_PIDFILE);
return 0;
}
This is the signal handler
static void exit_cb(evutil_socket_t sig, short events, void *user_data)
{
struct event_base *base = user_data;
struct timeval delay = { 2, 0 };
actslog_event_stop(AGENT_ACTSLOGD);
actslog_event_stop(AGENT_ESCALATED);
done = 1; //when this is 1, there is a function that will connect to the server to tell that the logging is stopped.
/* need to give some delay for us to send out the stop message to Logger */
event_base_loopexit(base, &delay);
}
static void power_off_cb(evutil_socket_t sig, short events, void *user_data)
{
struct event_base *base = user_data;
struct timeval delay = { 5, 0 };
char logfile_partial[MAXPATHLEN];
char logfile_complete[MAXPATHLEN];
char id[1024];
done =1;
event_base_loopexit(base,&delay);
snprintf(logfile_partial, //the logfile_partial will be the one with .part file
sizeof(logfile_partial),
"%s/SHELL.%s.part", logpath2, id);
snprintf(logfile_complete, //the logfile_complete will be the complete without .part
sizeof(logfile_complete),
"%s/SHELL.%s", logpath2, id);
if (rename(logfile_partial, logfile_complete)!=0) {
if (errno==ENOENT) {
int tmp;
log_error("mastershell [%s] log is incomplete", logfile_complete);
tmp = creat(logfile_complete, LOG_FILE_MODE);
if (tmp==-1) {
log_error("creat:%s:failed:%d:%s!!\n", logfile_complete, errno, strerror(errno));
} else {
close(tmp);
}
} else {
log_error("rename:%s:%s:failed:%d:%s!!\n", logfile_partial, logfile_complete, errno, strerror(errno));
}
}
if (rlog) {
rlog_close(rlog);
}
unlink(PATH_ACTSLOG);
pidfile_cleanup(ACTSLOGD_PIDFILE);
}
I have tested to handle all signal in exit_cb function. Also all signals inside power_off_cb function. Neither one of them works. I have tested on CentOS and Ubuntu. The logging process is a upstart process. Any comment or suggestion are really appreciated.
Here's the situation. I'm debugging a code to do a logging function.
When the user log in, the log file will be create with .part format.
This file is save locally inside the host. I do not know why it's name
as .part. When the user finish their session, the log file will be
rename as .username only. Beside the local log file, this code is also
connected to a server where this server will also save the logging
file. The problem is when the logging is still running, but the host
suddenly reboot. The reboot might be caused by command from root
If it is caused by a command from root you can handle it creating a script in /etc/init.d/.
, or a
force reboot, or maybe a hardware fault. This causes the logging file
to stay as .part and the server also follows.
You can't predict the future, neither the OS. If there is a reboot caused by a power or hardware failure there is no way to predict it.
Related
I am experimenting with libmosquitto-dev on Raspbian and having some issues.
My code works absolutely fine so far. I can connect to a broker and once the topic gets an update my programm prints the message as it should.
It's just the point when the broker dies after connection and gets restarted.
My code realize the connection dropped and tries to reconnect. Once the broker is back online my code reconnects. But from this on it does not print any updates on the channel.
Why not? I thought this would catch up the connection fine, but it does not.
Her's my code:
[...]
static int run = 1;
void connect_callback(struct mosquitto *mosq, void *obj, int result)
{
printf("connect callback, rc=%d\n", result);
}
void message_callback(struct mosquitto *mosq, void *obj, const struct mosquitto_message *message)
{
bool match = 0;
printf("got message '%.*s' for topic '%s'\n", message->payloadlen, (char*) message->payload, message->topic);
mosquitto_topic_matches_sub("Heizung", message->topic, &match);
if (match) {
printf("got message for HEIZUNG topic\n");
}
}
int main(int argc, char *argv[])
{
uint8_t reconnect = true;
char clientid[24];
struct mosquitto *mosq;
int rc = 0;
mosquitto_lib_init();
memset(clientid, 0, 24);
snprintf(clientid, 23, "mylog_%d", getpid());
mosq = mosquitto_new(clientid, true, 0);
if(mosq){
mosquitto_connect_callback_set(mosq, connect_callback);
mosquitto_message_callback_set(mosq, message_callback);
rc = mosquitto_connect(mosq, mqtt_host, mqtt_port, 60);
mosquitto_subscribe(mosq, NULL, "Heizung", 0);
// rc = mosquitto_loop_forever(mosq,20,5); // Tried with this function but same issue.
while(run){
rc = mosquitto_loop(mosq, -1, 1);
if(run && rc){
printf("connection error!\n");
sleep(10);
mosquitto_reconnect(mosq);
}
}
mosquitto_destroy(mosq);
}
mosquitto_lib_cleanup();
return rc;
}
What I see as output is the following:
connect callback, rc=0
got message 'ON1' for topic 'Heizung'
got message for Heizung topic
got message 'ON2' for topic 'Heizung'
got message for Heizung topic
got message 'ON3' for topic 'Heizung'
got message for Heizung topic
connection error!
connect callback, rc=0
You see the connection error (where "systemctl stop mosquitto" took place). And you see reconnection appears to be successful once the broker is back again. But it does not print any of the new messages which are send by the subscriber after the broker is back. Running the mosquitto_sub command in parallel sees all messages!
Any idea what is wrong here?
Thanks a lot!
/KNEBB
Move the call to mosquitto_subscribe to the connect_callback that way it will get called on a reconnect.
Since you are connecting with the CleanSession flag set to true each time you reconnect there will be no persistent session so the broker will not know to keep the subscription.
I'm doing a multiplatform shared library in C, which sends UDP messages using libuv, however I don't know much about libuv and I don't know if my implementation is good, or if there is another solution besides libuv.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <uv.h>
#define IP "0.0.0.0"
#define PORT 8090
#define STR_BUFFER 256
void on_send(uv_udp_send_t *req, int status) {
if (status) {
fprintf(stderr, "Send error %s\n", uv_strerror(status));
return;
}
}
int send_udp(char *msg){
uv_loop_t *loop = malloc(sizeof(uv_loop_t));
uv_loop_init(loop);
uv_udp_t send_socket;
uv_udp_init(loop, &send_socket);
struct sockaddr_in send_addr;
uv_ip4_addr(IP, PORT, &send_addr);
uv_udp_bind(&send_socket, (const struct sockaddr*)&send_addr, 0);
char buff[STR_BUFFER];
memset(buff,0,STR_BUFFER);
strcpy(buff,msg);
uv_buf_t buffer = uv_buf_init(buff,STR_BUFFER);
uv_udp_send_t send_req;
uv_udp_send(&send_req, &send_socket, &buffer, 1, (const struct sockaddr*)&send_addr, on_send);
uv_run(loop, UV_RUN_ONCE);
uv_loop_close(loop);
free(loop);
return 0;
}
int main() {
send_udp("test 123\n");
return 0;
}
Your implementation has multiple issues to date:
I'm not sure a single loop iteration is enough to send an UDP message on every platform. This is something you can check easily with the value returned by uv_run, see the documentation for uv_run when using the UV_RUN_ONCE mode:
UV_RUN_ONCE: Poll for i/o once. Note that this function blocks if there are no pending callbacks. Returns zero when done (no active handles or requests left), or non-zero if more callbacks are expected (meaning you should run the event loop again sometime in the future).
If you would keep your code as-is, I would suggest to do at least this:
int done;
do {
done = uv_run(loop, UV_RUN_ONCE);
} while (done != 0);
But keep on reading, you can do even better ! :)
It's quite costly in terms of performance, uv_loops are supposed to be long lasting, not to be created for each message sent.
Incomplete error handling: uv_udp_bind, uv_udp_send, ... they can fail !
How to improve
I would suggest you to change your code for one of the two following solutions:
Your library is used in a libuv context (a.k.a, you don't try to hide the libuv implementation detail but require all people who wish to use your library to use libuv explicitly.
You could then change your function signature to something like int send_udp(uv_loop_t *loop, char *msg) and let the library users manage the event loop and run it.
Your library uses libuv as an implementation detail: you don't want to bother your library users with libuv, therefore its your reponsibility to provide robust and performant code. This is how I would do it:
mylib_init: starts a thread and run an uv_loop on it
send_udp: push the message on a queue (beware of thread-safety), notify your loop it has a message to send (you can use uv_async for this), then you can send the message with approximately the same code you are already using.
mylib_shutdown: stop the loop and the thread (again, you can use an uv_async to call uv_stop from the right thread)
It would look like this (I don't have a compiler to test, but you'll have most of the work done):
static uv_thread_t thread; // our network thread
static uv_loop_t loop; // the loop running on the thread
static uv_async_t notify_send; // to notify the thread it has messages to send
static uv_async_t notify_shutdown; // to notify the thread it must shutdown
static queue_t buffer_queue; // a queue of messages to send
static uv_mutex_t buffer_queue_mutex; // to sync access to the queue from the various threads
static void thread_entry(void *arg);
static void on_send_messages(uv_async_t *handle);
static void on_shutdown(uv_async_t *handle);
int mylib_init() {
// will call thread_entry on a new thread, our network thread
return uv_thread_create(&thread, thread_entry, NULL);
}
int send_udp(char *msg) {
uv_mutex_lock(&buffer_queue_mutex);
queue_enqueue(&buffer_queue, strdup(msg)); // don't forget to free() after sending the message
uv_async_send(¬ify_send);
uv_mutex_unlock(&buffer_queue_mutex);
}
int mylib_shutdown() {
// will call on_shutdown on the loop thread
uv_async_send(¬ify_shutdown);
// wait for the thread to stop
return uv_thread_join(&thread);
}
static void thread_entry(void *arg) {
uv_loop_init(&loop);
uv_mutex_init_recursive(&buffer_queue_mutex);
uv_async_init(&loop, ¬ify_send, on_send_messages);
uv_async_init(&loop, ¬ify_shutdown, on_shutdown);
uv_run(&loop, UV_RUN_DEFAULT); // this code will not return until uv_stop is called
uv_mutex_destroy(&buffer_queue_mutex);
uv_loop_close(&loop);
}
static void on_send_messages(uv_async_t *handle) {
uv_mutex_lock(&buffer_queue_mutex);
char *msg = NULL;
// for each member of the queue ...
while (queue_dequeue(&buffer_queue, &msg) == 0) {
// create a uv_udp_t, send the message
}
uv_mutex_unlock(&buffer_queue_mutex);
}
static void on_shutdown(uv_async_t *handle) {
uv_stop(&loop);
}
It's up to you to develop or find a queue implementation ;)
Usage
int main() {
mylib_init();
send_udp("my super message");
mylib_shutdown();
}
FINAL EDIT: Solution to problem was stated by the answer I have selected. The representative example code is shown in the diff here
EDIT: Full compile-able code at the bottom of the post.
I have this rudimentary multithreaded server that simply accepts a connection and is supposed to pass the file descriptor off to a thread to allow this thread to handle it directly until the client disconnects.
For some reason, even with the following code flow inside of the server, some clients "Fall through the cracks" and get stuck in limbo. (They never get handled by the server so they just hang after accepting the connection)
The following block is my server main running loop:
while(g_serv.b_running)
{
//printf("Awaiting connection.\n");
client_fd = accept(g_serv.serv_listener_fd,
(struct sockaddr*)&cli_addr,
&clilen);
if (0 > client_fd)
{
fprintf(stderr,
"Error accepting connection. [%s]\n",
strerror(errno));
continue;
}
err = sem_trywait(&(g_serv.client_count_sem));
if (0 > err)
{
fprintf(stderr,
"Max connections reached. [%s]\n",
strerror(errno));
notify_client_max_connections(client_fd);
close(client_fd);
client_fd = 0;
continue;
}
printf("A client has connected.\n");
char byte[2] = "0";
err = send(client_fd, byte, 1, 0);
// Set up client FD in global position and wake up a thread to grab it
//
pthread_mutex_lock(&(g_serv.new_connection_fd_lock));
g_serv.new_connection_fd = client_fd;
if (0 != g_serv.new_connection_fd)
{
pthread_cond_signal(&(g_serv.new_connection));
}
pthread_mutex_unlock(&(g_serv.new_connection_fd_lock));
}
This block is the thread handling function:
void* thread_handler(void* args)
{
serv_t* p_serv = (serv_t*)args;
bool thread_client_connected;
int thread_client_fd;
while(p_serv->b_running)
{
pthread_mutex_lock(&(p_serv->new_connection_fd_lock));
while (0 == p_serv->new_connection_fd && p_serv->b_running)
{
pthread_cond_wait(&(p_serv->new_connection),
&(p_serv->new_connection_fd_lock));
}
thread_client_fd = p_serv->new_connection_fd;
p_serv->new_connection_fd = 0;
pthread_mutex_unlock(&(p_serv->new_connection_fd_lock));
// In the case of a pthread cond broadcast for exiting the server.
//
if (0 == thread_client_fd)
{
continue;
}
thread_client_connected = true;
while (thread_client_connected)
{
thread_client_connected = handle_client(thread_client_fd);
}
close(thread_client_fd);
thread_client_fd = 0;
sem_post(&(p_serv->client_count_sem));
}
return NULL;
} /* thread_handler */
Just for data reference here is my serv_t struct:
typedef struct serv_t {
bool b_running;
int max_connections;
int serv_listener_fd;
sem_t client_count_sem;
pthread_mutex_t new_connection_fd_lock;
pthread_cond_t new_connection;
int new_connection_fd;
pthread_t* p_thread_ids;
} serv_t;
Basically, if I run netcat or a client program I have against it with multiple instances via a bash command to "background" the application, some of these instances get stuck. I have it redirecting the output to a file, but what's happening is that particular instance of the client/netcat is just getting stuck after the accept call.
More specifically, if I run my program with two threads, one instance of a program gets stuck and no subsequent copies get stuck, even running 6500 instances against the server.
If I run it with ten threads, as many as 8 or 9 instances get stuck, but the threads still function properly within the server.
EDIT:
Client code I refer to, starting from the server letting the client know that the server is ready to receive data:
char buff[2] = { 0 };
err = recv(client_socket_fd, buff, 1, 0);
if ('0' != buff[0] && 1 != err)
{
fprintf(stderr,
"Server handshake error. [%s]\n",
strerror(errno));
close(client_socket_fd);
return EXIT_FAILURE;
}
if (NULL != p_infix_string)
{
if (MAX_BUFFER_SIZE < strlen(p_infix_string))
{
fprintf(stderr,
"Infix string is over 100 characters long.\n");
return EXIT_FAILURE;
}
errno = 0;
char* p_postfix = infix_to_postfix(p_infix_string);
if (EINVAL == errno || NULL == p_postfix)
{
fprintf(stderr, "Error converting provided string.\n");
}
bool success = send_postfix(p_postfix, client_socket_fd);
free(p_postfix);
if (false == success)
{
fprintf(stderr,
"An error occured while sending the equation to the server.\n");
close(client_socket_fd);
return EXIT_FAILURE;
}
}
The client is getting stuck at the receive call here:
bool send_postfix(char* p_postfix, int client_socket_fd)
{
if (NULL == p_postfix)
{
fprintf(stderr, "No postfix string provided to send to server.\n");
return false;
}
printf("Sending postfix to server\n");
int err = send(client_socket_fd,
p_postfix,
strnlen(p_postfix, MAX_BUFFER_SIZE),
0);
if(strnlen(p_postfix, MAX_BUFFER_SIZE) > err)
{
fprintf(stderr,
"Unable to send message to server. [%s]\n",
strerror(errno));
return false;
}
char response[MAX_BUFFER_SIZE] = { 0 };
printf("Waiting for receive\n");
err = recv(client_socket_fd, &response, MAX_BUFFER_SIZE, 0);
if (0 == err)
{
fprintf(stderr,
"Connection to server lost. [%s]\n",
strerror(errno));
return false;
}
else if (0 > err)
{
fprintf(stderr,
"Unable to receive message on socket. [%s]\n",
strerror(errno));
return false;
}
printf("Server responded with: \n%s\n", response);
return true;
} /* send_postfix */
EDIT: https://github.com/TheStaplergun/Problem-Code
I uploaded the code to this repo and removed the need for the extraneous files I use and filled them with placeholders.
You can recreate this problem using the server with the command ./postfix_server -p 8888 -n 2 and the client issue in another terminal with for i in {1..4}; do ./postfix_client -i 127.0.0.1 -p 8888 -e "3 + $i" &> $i.txt & done
The output of each client will be forcefully flushed because of the setbuf at the top of client. Run it, see if any programs hang, if not run that command again. Just type PS and see if one of them is hanging, and look at the resulting text file. You will see it is stuck at the receive call.
If you sigint the server (CTRL + C), the client that was stuck will close with a Connection reset by peer response from the server, so the server still does have that file descriptor locked up somewhere.
I believe a race condition is happening somehow, because it only happens randomly.
A curious thing is it only happens ONCE PER SERVER INSTANCE.
If I kill that hung instance and proceed to do it again 10000 times it never does another hang until the server is reset.
For some reason, even with the following code flow inside of the
server, some clients "Fall through the cracks" and get stuck in limbo.
(They never get handled by the server so they just hang after
accepting the connection)
There may be other issues, but the first one I see is that main loop does not ensure that a new connection is actually picked up by any handler thread before it tries to hand off the next connection. Even if there are handler threads already blocked on the CV when a new connection is accepted, it is possible for the main server thread to signal the CV, loop back around, accept another connection, reacquire the mutex, and overwrite the new-connection FD before any handler thread picks up the previous one. The chances of that increase if you have more threads than cores.
Note that that will also interfere with your semaphore-based counting of available handlers -- you decrement the semaphore for every semaphore accepted, but you increment it again only for those that are successfully handled.
There are various ways that you could make the main server thread wait for the new connection to be picked up by a handler. One group would involve the server waiting on a CV itself, and relying on a handler to signal it after picking up the connection. Another, perhaps simpler, approach would involve using a semaphore to similar effect. But I would suggest instead not waiting, but instead creating a thread-safe queue for available connections, so that the server doesn't have to wait. That would even allow for queueing more connections than presently available handlers, if that would be useful to you.
I am writing a Linux daemon that writes a log. I'd like the log to be rotated by logrotate. The program is written in C.
Normally, my program would open the log file when it starts, then write entries as needed and then, finally, close the log file on exit.
What do I need to do differently in order to support log rotation using logrotate? As far as I have understood, my program should be able to reopen the log file each time logrotate has finished it's work. The sources that I googled didn't, however, specify what reopening the log file exactly means. Do I need to do something about the old file and can I just create another file with the same name? I'd prefer quite specific instructions, like some simple sample code.
I also understood that there should be a way to tell my program when it is time to do the reopening. My program already has a D-Bus interface and I thought of using that for those notifications.
Note: I don't need instructions on how to configure logrotate. This question is only about how to make my own software compatible with it.
There are several common ways:
you use logrotate and your program should be able to catch a signal (usually SIGHUP) as a request to close and reopen its log file. Then logrotate sends the signal in a postrotate script
you use logrotate and your program is not aware of it, but can be restarted. Then logrotate restarts your program in a postrotate script. Cons: if the start of the program is expensive, this may be suboptimal
you use logrotate and your program is not aware of it, but you pass the copytruncate option to logrotate. Then logrotate copies the file and then truncates it. Cons: in race conditions you can lose messages. From rotatelog.conf manpage
... Note that there is a very small time slice between copying the file and truncating it, so some logging data might be lost...
you use rotatelogs, an utility for httpd Apache. Instead of writing directly to a file, you programs pipes its logs to rotatelogs. Then rotatelogs manages the different log files. Cons: your program should be able to log to a pipe or you will need to install a named fifo.
But beware, for critical logs, it may be interesting to close the files after each message, because it ensures that everything has reached the disk in case of an application crash.
Although man logrotate examples use the HUP signal, I recommend using USR1 or USR2, as it is common to use HUP for "reload configuration". So, in logrotate configuration file, you'd have for example
/var/log/yourapp/log {
rotate 7
weekly
postrotate
/usr/bin/killall -USR1 yourapp
endscript
}
The tricky bit is to handle the case where the signal arrives in the middle of logging. The fact that none of the locking primitives (other than sem_post(), which does not help here) are async-signal safe makes it an interesting issue.
The easiest way to do it is to use a dedicated thread, waiting in sigwaitinfo(), with the signal blocked in all threads. At exit time, the process sends the signal itself, and joins the dedicated thread. For example,
#define ROTATE_SIGNAL SIGUSR1
static pthread_t log_thread;
static pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
static char *log_path = NULL;
static FILE *volatile log_file = NULL;
int log(const char *format, ...)
{
va_list args;
int retval;
if (!format)
return -1;
if (!*format)
return 0;
va_start(args, format);
pthread_mutex_lock(&log_lock);
if (!log_file)
return -1;
retval = vfprintf(log_file, format, args);
pthread_mutex_unlock(&log_lock);
va_end(args);
return retval;
}
void *log_sighandler(void *unused)
{
siginfo_t info;
sigset_t sigs;
int signum;
sigemptyset(&sigs);
sigaddset(&sigs, ROTATE_SIGNAL);
while (1) {
signum = sigwaitinfo(&sigs, &info);
if (signum != ROTATE_SIGNAL)
continue;
/* Sent by this process itself, for exiting? */
if (info.si_pid == getpid())
break;
pthread_mutex_lock(&log_lock);
if (log_file) {
fflush(log_file);
fclose(log_file);
log_file = NULL;
}
if (log_path) {
log_file = fopen(log_path, "a");
}
pthread_mutex_unlock(&log_lock);
}
/* Close time. */
pthread_mutex_lock(&log_lock);
if (log_file) {
fflush(log_file);
fclose(log_file);
log_file = NULL;
}
pthread_mutex_unlock(&log_lock);
return NULL;
}
/* Initialize logging to the specified path.
Returns 0 if successful, errno otherwise. */
int log_init(const char *path)
{
sigset_t sigs;
pthread_attr_t attrs;
int retval;
/* Block the rotate signal in all threads. */
sigemptyset(&sigs);
sigaddset(&sigs, ROTATE_SIGNAL);
pthread_sigmask(SIG_BLOCK, &sigs, NULL);
/* Open the log file. Since this is in the main thread,
before the rotate signal thread, no need to use log_lock. */
if (log_file) {
/* You're using this wrong. */
fflush(log_file);
fclose(log_file);
}
log_file = fopen(path, "a");
if (!log_file)
return errno;
log_path = strdup(path);
/* Create a thread to handle the rotate signal, with a tiny stack. */
pthread_attr_init(&attrs);
pthread_attr_setstacksize(65536);
retval = pthread_create(&log_thread, &attrs, log_sighandler, NULL);
pthread_attr_destroy(&attrs);
if (retval)
return errno = retval;
return 0;
}
void log_done(void)
{
pthread_kill(log_thread, ROTATE_SIGNAL);
pthread_join(log_thread, NULL);
free(log_path);
log_path = NULL;
}
The idea is that in main(), before logging or creating any other threads, you call log_init(path-to-log-file), noting that a copy of the log file path is saved. It sets up the signal mask (inherited by any threads you might create), and creates the helper thread. Before exiting, you call log_done(). To log something to the log file, use log() like you would use printf().
I'd personally also add a timestamp before the vfprintf() line, automatically:
struct timespec ts;
struct tm tm;
if (clock_gettime(CLOCK_REALTIME, &ts) == 0 &&
localtime_r(&(ts.tv_sec), &tm) == &tm)
fprintf(log_file, "%04d-%02d-%02d %02d:%02d:%02d.%03ld: ",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec,
ts.tv_nsec / 1000000L);
This YYYY-MM-DD HH:MM:SS.sss format has the nice benefit that it is close to a worldwide standard (ISO 8601) and sorts in the correct order.
Normally, my program would open the log file when it starts, then
write entries as needed and then, finally, close the log file on exit.
What do I need to do differently in order to support log rotation
using logrotate?
No, your program should work as if it doesn't know anything about logrotate.
Do I need to do something about the old file and can I just create another file with the same name?
No. There should be only one log file to be opened and be written. Logrotate will check that file and if it becomes too large, it does copy/save the old part, and truncate the current log file. Therefore, your program should work completely transparent - it doesn't need to know anything about logrotate.
I wrote a chardevice that passes some messages received from the network to an user space application. The user space application has to both read the chardevice and send/receive messages via TCP sockets to other user-space applications. Both read and receiving should be blocking.
Since Libevent is able to handle multiple events at the same time, I thought registering an event for the file created by the chardevice and an event for a socket would just work, but I was wrong.
But a chardevice creates a "character special file", and libevent seems to not be able to block. If I implement a blocking mechanism inside the chardevice, i.e. mutex or semaphore, then the socket event blocks too, and the application cannot receive messages.
The user space application has to accept outside connections at any time.
Do you know how to make it work? Maybe also using another library, I just want a blocking behaviour for both socket and file reader.
Thank you in advance.
Update: Thanks to #Ahmed Masud for the help. This is what I've done
Kernel module chardevice:
Implement a poll function that waits until new data is available
struct file_operations fops = {
...
.read = kdev_read,
.poll = kdev_poll,
};
I have a global variable to handle if the user space has to stop, and a wait queue:
static working = 1;
static wait_queue_head_t access_wait;
This is the read function, I return -1 if there is an error in copy_to_user, > 0 if everything went well, and 0 if the module has to stop. used_buff is atomic since it handles the size of a buffer shared read by user application and written by kernel module.
ssize_t
kdev_read(struct file* filep, char* buffer, size_t len, loff_t* offset)
{
int error_count;
if (signal_pending(current) || !working) { // user called sigint
return 0;
}
atomic_dec(&used_buf);
size_t llen = sizeof(struct user_msg) + msg_buf[first_buf]->size;
error_count = copy_to_user(buffer, (char*)msg_buf[first_buf], llen);
if (error_count != 0) {
atomic_inc(&used_buf);
paxerr("send fewer characters to the user");
return error_count;
} else
first_buf = (first_buf + 1) % BUFFER_SIZE;
return llen;
}
When there is data to read, I simply increment used_buf and call wake_up_interruptible(&access_wait).
This is the poll function, I just wait until the used_buff is > 0
unsigned int
kdev_poll(struct file* file, poll_table* wait)
{
poll_wait(file, &access_wait, wait);
if (atomic_read(&used_buf) > 0)
return POLLIN | POLLRDNORM;
return 0;
}
Now, the problem here is that if I unload the module while the user space application is waiting, the latter will go into a blocked state and it won't be possible to stop it. That's why I wake up the application when the module is unloaded
void
kdevchar_exit(void)
{
working = 0;
atomic_inc(&used_buf); // increase buffer size to application is unlocked
wake_up_interruptible(&access_wait); // wake up application, but this time read will return 0 since working = 0;
... // unregister everything
}
User space application
Libevent by default uses polling, so simply create an event_base and a reader event.
base = event_base_new();
filep = open(fname, O_RDWR | O_NONBLOCK, 0);
evread = event_new(base, filep, EV_READ | EV_PERSIST,
on_read_file, base);
where on_read_file simply reads the file, no poll call is made (libevent handles that):
static void
on_read_file(evutil_socket_t fd, short event, void* arg)
{
struct event_base* base = arg;
int len = read(...);
if (len < 0)
return;
if (len == 0) {
printf("Stopped by kernel module\n");
event_base_loopbreak(base);
return;
}
... // handle message
}