Blame SOURCES/at-3.1.13-aborted-jobs.patch

4e3b3d
diff -up at-3.1.13/atd.c.aborted at-3.1.13/atd.c
4e3b3d
--- at-3.1.13/atd.c.aborted	2016-04-22 13:30:58.563029540 +0200
4e3b3d
+++ at-3.1.13/atd.c	2017-09-14 16:00:38.109011916 +0200
4e3b3d
@@ -74,6 +74,9 @@
4e3b3d
 #include <syslog.h>
4e3b3d
 #endif
4e3b3d
 
4e3b3d
+#include <sys/file.h>
4e3b3d
+#include <utime.h>
4e3b3d
+
4e3b3d
 /* Local headers */
4e3b3d
 
4e3b3d
 #include "privs.h"
4e3b3d
@@ -285,7 +288,7 @@ run_file(const char *filename, uid_t uid
4e3b3d
  * mail to the user.
4e3b3d
  */
4e3b3d
     pid_t pid;
4e3b3d
-    int fd_out, fd_in;
4e3b3d
+    int fd_out, fd_in, fd_std;
4e3b3d
     char jobbuf[9];
4e3b3d
     char *mailname = NULL;
4e3b3d
     int mailsize = 128;
4e3b3d
@@ -404,6 +407,10 @@ run_file(const char *filename, uid_t uid
4e3b3d
 
4e3b3d
     fcntl(fd_in, F_SETFD, fflags & ~FD_CLOEXEC);
4e3b3d
 
4e3b3d
+    if (flock(fd_in, LOCK_EX | LOCK_NB) != 0)
4e3b3d
+	    perr("Somebody already locked the job %8lu (%.500s) - "
4e3b3d
+	     "aborting", jobno, filename);
4e3b3d
+
4e3b3d
     /*
4e3b3d
      * If the spool directory is mounted via NFS `atd' isn't able to
4e3b3d
      * read from the job file and will bump out here.  The file is
4e3b3d
@@ -563,10 +570,7 @@ run_file(const char *filename, uid_t uid
4e3b3d
 	PRIV_END
4e3b3d
     }
4e3b3d
     /* We're the parent.  Let's wait.
4e3b3d
-     */
4e3b3d
-    close(fd_in);
4e3b3d
-
4e3b3d
-    /* We inherited the master's SIGCHLD handler, which does a
4e3b3d
+       We inherited the master's SIGCHLD handler, which does a
4e3b3d
        non-blocking waitpid. So this blocking one will eventually
4e3b3d
        return with an ECHILD error. 
4e3b3d
      */
4e3b3d
@@ -583,14 +587,14 @@ run_file(const char *filename, uid_t uid
4e3b3d
     /* some sendmail implementations are confused if stdout, stderr are
4e3b3d
      * not available, so let them point to /dev/null
4e3b3d
      */
4e3b3d
-    if ((fd_in = open("/dev/null", O_WRONLY)) < 0)
4e3b3d
+    if ((fd_std = open("/dev/null", O_WRONLY)) < 0)
4e3b3d
 	perr("Could not open /dev/null.");
4e3b3d
-    if (dup2(fd_in, STDOUT_FILENO) < 0)
4e3b3d
+    if (dup2(fd_std, STDOUT_FILENO) < 0)
4e3b3d
 	perr("Could not use /dev/null as standard output.");
4e3b3d
-    if (dup2(fd_in, STDERR_FILENO) < 0)
4e3b3d
+    if (dup2(fd_std, STDERR_FILENO) < 0)
4e3b3d
 	perr("Could not use /dev/null as standard error.");
4e3b3d
-    if (fd_in != STDOUT_FILENO && fd_in != STDERR_FILENO)
4e3b3d
-	close(fd_in);
4e3b3d
+    if (fd_std != STDOUT_FILENO && fd_std != STDERR_FILENO)
4e3b3d
+	close(fd_std);
4e3b3d
 
4e3b3d
     if (unlink(filename) == -1)
4e3b3d
         syslog(LOG_WARNING, "Warning: removing output file for job %li failed: %s",
4e3b3d
@@ -598,7 +602,12 @@ run_file(const char *filename, uid_t uid
4e3b3d
 
4e3b3d
     /* The job is now finished.  We can delete its input file.
4e3b3d
      */
4e3b3d
-    chdir(ATJOB_DIR);
4e3b3d
+    if (chdir(ATJOB_DIR) != 0)
4e3b3d
+	perr("Somebody removed %s directory from under us.", ATJOB_DIR);
4e3b3d
+
4e3b3d
+    /* This also removes the flock */
4e3b3d
+    (void)close(fd_in);
4e3b3d
+
4e3b3d
     unlink(newname);
4e3b3d
     free(newname);
4e3b3d
 
4e3b3d
@@ -642,7 +651,7 @@ run_file(const char *filename, uid_t uid
4e3b3d
 	PRIV_END
4e3b3d
    }
4e3b3d
    else if ( mail_pid == -1 ) {
4e3b3d
-           perr("fork of mailer failed");
4e3b3d
+           syslog(LOG_ERR, "fork of mailer failed: %m");
4e3b3d
    }
4e3b3d
    else {
4e3b3d
            /* Parent */
4e3b3d
@@ -738,8 +747,16 @@ run_loop()
4e3b3d
 	/* Skip lock files */
4e3b3d
 	if (queue == '=') {
4e3b3d
 	    if ((buf.st_nlink == 1) && (run_time + CHECK_INTERVAL <= now)) {
4e3b3d
-		/* Remove stale lockfile FIXME: lock the lockfile, if you fail, it's still in use. */
4e3b3d
-		unlink(dirent->d_name);
4e3b3d
+		int fd;
4e3b3d
+
4e3b3d
+		fd = open(dirent->d_name, O_RDONLY);
4e3b3d
+		if (fd != -1) {
4e3b3d
+			if (flock(fd, LOCK_EX | LOCK_NB) == 0) {
4e3b3d
+				unlink(dirent->d_name);
4e3b3d
+				syslog(LOG_NOTICE, "removing stale lock file %s\n", dirent->d_name);
4e3b3d
+			}
4e3b3d
+			(void)close(fd);
4e3b3d
+		}
4e3b3d
 	    }
4e3b3d
 	    continue;
4e3b3d
 	}
4e3b3d
@@ -752,12 +769,17 @@ run_loop()
4e3b3d
 	/* Is the file already locked?
4e3b3d
 	 */
4e3b3d
 	if (buf.st_nlink > 1) {
4e3b3d
+	    if (run_time < buf.st_mtime)
4e3b3d
+		run_time = buf.st_mtime;
4e3b3d
 	    if (run_time + CHECK_INTERVAL <= now) {
4e3b3d
-
4e3b3d
 		/* Something went wrong the last time this was executed.
4e3b3d
 		 * Let's remove the lockfile and reschedule.
4e3b3d
+		 * We also change the timestamp to avoid rerunning the job more
4e3b3d
+		 * than once every CHECK_INTERVAL.
4e3b3d
 		 */
4e3b3d
 		strncpy(lock_name, dirent->d_name, sizeof(lock_name));
4e3b3d
+		if (utime(lock_name, 0) < 0)
4e3b3d
+			syslog(LOG_ERR, "utime couldn't be set for lock file %s\n", lock_name);
4e3b3d
 		lock_name[sizeof(lock_name)-1] = '\0';
4e3b3d
 		lock_name[0] = '=';
4e3b3d
 		unlink(lock_name);