Jagboy
Posts: 17
Joined: Thu Dec 28, 2017 5:19 pm

Tearing My Hair Out Over Signals...

Thu Aug 09, 2018 4:43 am

I am having trouble getting an application that runs as a systemd daemon to exit, and I can't make sense of what it's even trying to do...

Here is the pertinent code:

Code: Select all

main(...)
{
	/* We will handle SIGTERM, SIGHUP, SIGINT, SIGCONT */
	sigset_t mask;
	sigemptyset(&mask);
	sigaddset(&mask, SIGTERM);
	sigaddset(&mask, SIGHUP);
	sigaddset(&mask, SIGINT);
	sigaddset(&mask, SIGCONT);

	/* Block the signals thet we handle using signalfd(), so they don't
	* cause signal handlers or default signal actions to execute. */
	if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
	{
		perror("Failed sigprocmask in main()\n");
		return 1;
	}

	/* Create a file descriptor from which we will read the signals. */
	int sfd;
	sfd = signalfd(-1, &mask, 0);
	if (sfd < 0)
	{
		logprintf("Failed creating signalfd file descriptor in main()\n");
		return 1;
	}
	
	...
	
	bool done = false;
	while(!done)
	{
		struct pollfd pfd[NFDS];
		int ret;
		ssize_t bytes;

		pfd[0].fd = sfd;
		pfd[0].events = POLLIN | POLLERR | POLLHUP;		
		
		ret = poll(pfd, NFDS, 1);

		if (ret > 0)
		{
			if (pfd[0].revents & POLLIN)
			{
				/* We have a valid signal, read the info from the fd */
				struct signalfd_siginfo info;
				ssize_t res = read(sfd, &info, sizeof(info));
				//assert(bytes == sizeof(info));
				if (res != sizeof(info))
				{
					logprintf("Got Invalid result length from read of signalfd\n");
					break;
				}
				else
				{
					unsigned sig = info.ssi_signo;
					unsigned user = info.ssi_uid;

					if (sig == SIGTERM)
					{
						logprintf("Got SIGTERM - user=%d pid=%d ppid=%d Exiting\n", user, getpid(), getppid());
						// Close the file descriptor
						close(sfd);
						done = true;
					}
					if (sig == SIGCONT)
					{
						logprintf("Got SIGCONT - user=%d pid=%d ppid=%d Exiting\n", user, getpid(), getppid());
						// Close the file descriptor
						close(sfd);
						done = true;
					}
					else if (sig == SIGINT)
					{
						logprintf("Got SIGINT - user=%d pid=%d ppid=%d Exiting\n", user, getpid(), getppid());
					}
					else if (sig == SIGSEGV)
					{
						logprintf("Got SIGSEGV - user=%d pid=%d ppid=%d Exiting\n", user, getpid(), getppid());
					}
					else if (sig == SIGHUP)
					{
						QDConfig *newcfg = new QDConfig(Config->ConfigPath());
						QDConfig *oldcfg = Config;
						Config = newcfg;
						delete oldcfg;
						logprintf("Got SIGHUP - re-loaded: %s\n", Config->ConfigPath());
					}
					else
					{
						logprintf("Got unhandled signal %d - user=%d pid=%d ppid=%d Exiting\n", sig, user, getpid(), getppid());
						//break;
					}
				}
			}
		}
	}
}
When I try to stop the daemon (qddaemon) using "sudo systemctl stop qddaemon", systemctl simply hangs. I never see the SIGTERM message in my code above, but I DO see:

2018-08-09 04:30:06 sigHandler: Unhandled signal 15, terminating

This message is NOT coming from my code, so the signal is being trapped somewhere else. I will also, randomly, see messages identical to the above, but referencing any or all of signals 11 (SIGSEGV,) 15 (SIGTERM) and 18 (SIGCONT)!

This code WAS working for a long time, and only recently stopped working, and I cannot figure out why. Why is my polling loop no longer catching the SIGTERM??

Regards,
Ray L.

Jagboy
Posts: 17
Joined: Thu Dec 28, 2017 5:19 pm

Re: Tearing My Hair Out Over Signals...

Thu Aug 09, 2018 2:55 pm

I have created a test app, using basically the same signal handling code as the real application:

Code: Select all

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <sys/signalfd.h>
#include <string.h>
#include <assert.h>
#include <poll.h>


#define false 	0
#define true 		1

#define NFDS		1
 
int main (int argc, char *argv[])
{
	/* We will handle SIGTERM, SIGHUP, SIGINT, SIGCONT */
	sigset_t mask;
	sigemptyset(&mask);
	sigaddset(&mask, SIGTERM);
	sigaddset(&mask, SIGHUP);
	sigaddset(&mask, SIGINT);
	sigaddset(&mask, SIGCONT);
//	sigaddset(&mask, SIGSEGV);

	/* Block the signals thet we handle using signalfd(), so they don't
	* cause signal handlers or default signal actions to execute. */
	if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
	{
		perror("Failed sigprocmask in main()\n");
		return 1;
	}

	/* Create a file descriptor from which we will read the signals. */
	int sfd;
	sfd = signalfd(-1, &mask, 0);
	if (sfd < 0)
	{
		printf("Failed creating signalfd file descriptor in main()\n");
		return 1;
	}

	int done = false;
	while(!done)
	{
		struct pollfd pfd[NFDS];
		int ret;
		ssize_t bytes;

		pfd[0].fd = sfd;
		pfd[0].events = POLLIN | POLLERR | POLLHUP;		
		
		ret = poll(pfd, NFDS, 1);

		if (ret > 0)
		{
			if (pfd[0].revents & POLLIN)
			{
				/* We have a valid signal, read the info from the fd */
				struct signalfd_siginfo info;
				ssize_t res = read(sfd, &info, sizeof(info));
				//assert(bytes == sizeof(info));
				if (res != sizeof(info))
				{
					printf("Got Invalid result length from read of signalfd\n");
					break;
				}
				else
				{
					unsigned sig = info.ssi_signo;
					unsigned user = info.ssi_uid;

					if (sig == SIGTERM)
					{
						printf("Got SIGTERM - user=%d pid=%d ppid=%d\n", user, getpid(), getppid());
						close(sfd);
						done = true;
					}
					else if (sig == SIGCONT)
					{
						printf("Got SIGCONT - user=%d pid=%d ppid=%d\n", user, getpid(), getppid());
					}
					else if (sig == SIGINT)
					{
						printf("Got SIGINT - user=%d pid=%d ppid=%d\n", user, getpid(), getppid());
					}
					else if (sig == SIGSEGV)
					{
						printf("Got SIGSEGV - user=%d pid=%d ppid=%d\n", user, getpid(), getppid());
					}
					else if (sig == SIGHUP)
					{
						printf("Got SIGHUP\n");
					}
					else
					{
						printf("Got unhandled signal %d - user=%d pid=%d ppid=%d\n", sig, user, getpid(), getppid());
						//break;
					}
				}
			}
		}
	}		
}
This seems to work correctly, so I'm baffled why it does NOT work properly in the context of the full application. There was one stupid bug I missed (missing else on the if (sign == SIGCONT)). It appears the systemctl stop does work.... eventually. It can take several minutes for the application to terminate. Why so long? Until recently, it was always essentially instantaneous. Why is the signal not being seen almost instantly?

The behavior seems somewhat random. Here is what I get by sending various signals to the daemon:

SIGHUP: system-generated unhandled exception message, daemon does NOT terminate
SIGTERM: system-generated unhandled exception message, daemon does NOT terminate
SIGINT: Absolutely nothing
SIGCONT: MY SIGCONT message

Running out of hair quickly...

Regards,
Ray L.

Jagboy
Posts: 17
Joined: Thu Dec 28, 2017 5:19 pm

Re: Tearing My Hair Out Over Signals...

Fri Aug 10, 2018 3:03 am

I am now pretty well convinced my problem is that the above methods of handling signals are only robust for single-threaded applications, and my app is multi--threaded. So, I found what seems to be a pthread-based approach that uses a separate thread to handle the exceptions. This is encapsulated in the followng example program, which works correctly:

Code: Select all

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/signalfd.h>


#define true 1
#define false 0

static sigset_t		signal_mask;	// Signal mask for signal handler thread

int Terminate = 0;

void *signal_thread(void *arg)
{
	int       sig_caught;    /* signal caught       */
	int       rc;            /* returned code       */


	while (1)
	{
		printf("signal_thread waiting for signal...\n");
		rc = sigwait(&signal_mask, &sig_caught);
		if (rc != 0)
		{
			printf("sigwait returned %d\n", rc);
		}

		switch (sig_caught)
		{
			case SIGTERM:
				{
					printf("Caught SIGTERM. Exiting\n");
					// Close the file descriptor
					Terminate = true;
					break;
				}

			case SIGCONT:
				{
					printf("Caught SIGCONT.\n");
					break;
				}

			case SIGINT:
				{
					printf("Caught SIGINT.\n");
					break;
				}

			case SIGSEGV:
				{
					printf("Caught SIGSEGV.\n");
					break;
				}

			case SIGHUP:
				{
					printf("Caught SIGHUP\n");
					break;
				}

			default:
				{
					printf("Caught unhandled signal %d\n", sig_caught);
					break;
				}
		}
	}
}


void *dummy_thread(void *p)
{
	while(1)
	{
		usleep(1000*1000);
		printf("tick...");
		fflush(stdout);
	}
}


int main (int argc, char *argv[])
{
	pthread_t  	sig_thr_id;      /* signal handler thread ID */
	int        	rc;              /* return code              */
	char 		*b;

	sigemptyset(&signal_mask);
	sigaddset(&signal_mask, SIGINT);
	sigaddset(&signal_mask, SIGTERM);
	sigaddset(&signal_mask, SIGCONT);
	sigaddset(&signal_mask, SIGTERM);
	rc = pthread_sigmask(SIG_BLOCK, &signal_mask, NULL);
	if (rc != 0)
	{
		printf("Failed in pthread_sigmask!\n");
		exit(1);
	}
	/* any newly created threads inherit the signal mask */


	rc = pthread_create(&sig_thr_id, NULL, signal_thread, NULL);
	if (rc != 0)
	{
		printf("Failed to create signal handler thread!\n");
		exit(1);
	}

	// Create a bunch of threads to do busy work	
	for (int i=0; i<10; i++)
	{
		rc = pthread_create(&sig_thr_id, NULL, dummy_thread, NULL);
		if (rc != 0)
		{
			printf("Failed to create dummy thread!\n");
			exit(1);
		}
		printf("Thread %d created...\n", i);
		usleep(100*1000);
	}
	
	while(!Terminate)
	{
	}
}
BUT.... When I integrate the above method into MY multi-threaded app, it does NOT work. The signal handler never sees ANY signals, and ALL are handled by the system.

I truly do hope someone here has a MUCH better understanding of signal handling in multi-threaded applications than I do, and can help me figure out why the above approach does not work as desired. I am doing the signal masking, and launching the signal handler thread BEFORE creating any other threads, and my understanding is all threads SHOULD inherit the signal handler. But for some reason it does not seem to be working for me, no doubt because of some dumb mistake on my part...

Regards,
Ray L.

Jagboy
Posts: 17
Joined: Thu Dec 28, 2017 5:19 pm

Re: Tearing My Hair Out Over Signals...

Fri Aug 10, 2018 3:19 am

This is... odd... The new code does sort, kinda, work... sometimes. If I send SIGTERM repeatedly, the first few are "missed" and handled by the system, printing the usual "Got unhandled signal xx" error message. But, after, 2-5 tries, My handler finally gets the signal, prints its message, and sets Terminate. BUT, it does NOT terminate until I send one MORE SIGTERM!

I am soooooo confused....

Regards,
Ray L.

photomankc
Posts: 80
Joined: Fri Aug 24, 2012 12:58 pm

Re: Tearing My Hair Out Over Signals...

Thu Sep 20, 2018 5:55 pm

I see this is quite old but in case it's still an issue:

I'm not an expert and I'm having a hard time following what all you have going on in those snips but this is code that is working for me for signals:

"g_*" are global variables.

Code: Select all


#define SIGNAL_NONE     0
#define SIGNAL_USR1     1
#define SIGNAL_USR2     2
#define SIGNAL_LDCONF   3
#define SIGNAL_TERMPRG  4

// Initialize signal handling for this process
void initSignalHandler()
{
  struct sigaction    newSigAct;

  // Setup signal handler to allow for common Unix signals to the process
  newSigAct.sa_handler = signalHandler;
  newSigAct.sa_flags = SA_RESTART;
  sigfillset(&newSigAct.sa_mask);

  // Setup the signals that this application will handle
  sigaction(SIGTERM, &newSigAct, 0);
  sigaction(SIGQUIT, &newSigAct, 0);
  sigaction(SIGINT,  &newSigAct, 0);
  sigaction(SIGHUP,  &newSigAct, 0);
  sigaction(SIGUSR1, &newSigAct, 0);
  sigaction(SIGUSR2, &newSigAct, 0);
}



// Handler for process signals sent this daemon
void signalHandler(int signal)
{
  switch(signal)
  {
    // Standard termination or quit signals
    // Perform an orderly shutdown and cleanup open files
    case SIGTERM:
      g_sig_flag = SIGNAL_TERMPRG;
      break;
    case SIGQUIT:
      g_sig_flag = SIGNAL_TERMPRG;
      break;
    case SIGINT:
      g_sig_flag = SIGNAL_TERMPRG;
      break;

    // Hangup signal
    // Reread the configuration file and reload variables
    case SIGHUP:
      g_sig_flag = SIGNAL_LDCONF;
      break;

    // User defined signals
    // Write / update data output file
    case SIGUSR1:
      g_sig_flag = SIGNAL_USR1;
      break;
    case SIGUSR2:
      g_sig_flag = SIGNAL_USR2;
      break;
  }
}


    
void main (void)
{
 
   initSignalHandler();
   
    // Somwhere in the main loop I have this setup to check the flags.  I could probably put this into
    // a function instead.
    
  while(g_loop_flag)
  {
  
    [snip]
    
    // Handle any signals that have been raised to the process.
      switch (g_sig_flag)
      {
        case SIGNAL_USR1:
          printf("SIGNAL_USR1");
          break;
        case SIGNAL_USR2:
          printf("SIGNAL_USR2");
          break;
        case SIGNAL_LDCONF:
          printf("SIGNAL_LDCONF");
          break;
        case SIGNAL_TERMPRG:
          printf("SIGNAL_TERMPRG");
          g_loop_flag = false;
          break;
      }
      g_sig_flag = SIGNAL_NONE;

    [snip]
    
  }
  
  // Cleanup any stuff I have open when the while loop exits.   
  
}


I just put the basics like this into a skeleton file I use to start new Linux CLI applications. Hope that helps.

Return to “C/C++”

Who is online

Users browsing this forum: No registered users and 7 guests