blob: 52fee7d13f15d33f73a163d4488bcb74a80e0eb2 [file] [log] [blame]
#include <errno.h>
#include <fcntl.h>
#include <linux/watchdog.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/signalfd.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include "igt_core.h"
#include "executor.h"
#include "output_strings.h"
static struct {
int *fds;
size_t num_dogs;
} watchdogs;
static void __close_watchdog(int fd)
{
ssize_t ret = write(fd, "V", 1);
if (ret == -1)
fprintf(stderr, "Failed to stop a watchdog: %s\n",
strerror(errno));
close(fd);
}
static void close_watchdogs(struct settings *settings)
{
size_t i;
if (settings && settings->log_level >= LOG_LEVEL_VERBOSE)
printf("Closing watchdogs\n");
if (settings == NULL && watchdogs.num_dogs != 0)
fprintf(stderr, "Closing watchdogs from exit handler!\n");
for (i = 0; i < watchdogs.num_dogs; i++) {
__close_watchdog(watchdogs.fds[i]);
}
free(watchdogs.fds);
watchdogs.num_dogs = 0;
watchdogs.fds = NULL;
}
static void close_watchdogs_atexit(void)
{
close_watchdogs(NULL);
}
static void init_watchdogs(struct settings *settings)
{
int i;
char name[32];
int fd;
memset(&watchdogs, 0, sizeof(watchdogs));
if (!settings->use_watchdog || settings->inactivity_timeout <= 0)
return;
if (settings->log_level >= LOG_LEVEL_VERBOSE) {
printf("Initializing watchdogs\n");
}
atexit(close_watchdogs_atexit);
for (i = 0; ; i++) {
snprintf(name, sizeof(name), "/dev/watchdog%d", i);
if ((fd = open(name, O_RDWR | O_CLOEXEC)) < 0)
break;
watchdogs.num_dogs++;
watchdogs.fds = realloc(watchdogs.fds, watchdogs.num_dogs * sizeof(int));
watchdogs.fds[i] = fd;
if (settings->log_level >= LOG_LEVEL_VERBOSE)
printf(" %s\n", name);
}
}
static int watchdogs_set_timeout(int timeout)
{
size_t i;
int orig_timeout = timeout;
for (i = 0; i < watchdogs.num_dogs; i++) {
if (ioctl(watchdogs.fds[i], WDIOC_SETTIMEOUT, &timeout)) {
__close_watchdog(watchdogs.fds[i]);
watchdogs.fds[i] = -1;
continue;
}
if (timeout < orig_timeout) {
/*
* Timeout of this caliber refused. We want to
* use the same timeout for all devices.
*/
return watchdogs_set_timeout(timeout);
}
}
return timeout;
}
static void ping_watchdogs(void)
{
size_t i;
int ret;
for (i = 0; i < watchdogs.num_dogs; i++) {
ret = ioctl(watchdogs.fds[i], WDIOC_KEEPALIVE, NULL);
if (ret == -1)
fprintf(stderr, "Failed to ping a watchdog: %s\n",
strerror(errno));
}
}
static char *handle_lockdep(void)
{
const char *header = "Lockdep not active\n\n/proc/lockdep_stats contents:\n";
int fd = open("/proc/lockdep_stats", O_RDONLY);
const char *debug_locks_line = " debug_locks:";
char buf[4096], *p;
ssize_t bufsize = 0;
int val;
if (fd < 0)
return NULL;
strcpy(buf, header);
if ((bufsize = read(fd, buf + strlen(header), sizeof(buf) - strlen(header) - 1)) < 0)
return NULL;
bufsize += strlen(header);
buf[bufsize] = '\0';
close(fd);
if ((p = strstr(buf, debug_locks_line)) != NULL &&
sscanf(p + strlen(debug_locks_line), "%d", &val) == 1 &&
val != 1) {
return strdup(buf);
}
return NULL;
}
/* see Linux's include/linux/kernel.h */
static const struct {
unsigned long bit;
const char *explanation;
} abort_taints[] = {
{(1 << 5), "TAINT_BAD_PAGE: Bad page reference or an unexpected page flags."},
{(1 << 7), "TAINT_DIE: Kernel has died - BUG/OOPS."},
{(1 << 9), "TAINT_WARN: WARN_ON has happened."},
{0, 0}};
static unsigned long tainted(unsigned long *taints)
{
FILE *f;
unsigned long bad_taints = 0;
for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++)
bad_taints |= taint->bit;
*taints = 0;
f = fopen("/proc/sys/kernel/tainted", "r");
if (f) {
fscanf(f, "%lu", taints);
fclose(f);
}
return *taints & bad_taints;
}
static char *handle_taint(void)
{
unsigned long taints;
char *reason;
if (!tainted(&taints))
return NULL;
asprintf(&reason, "Kernel badly tainted (%#lx) (check dmesg for details):\n",
taints);
for (typeof(*abort_taints) *taint = abort_taints; taint->bit; taint++) {
if (taint->bit & taints) {
char *old_reason = reason;
asprintf(&reason, "%s\t(%#lx) %s\n",
old_reason,
taint->bit,
taint->explanation);
free(old_reason);
}
}
return reason;
}
static const struct {
int condition;
char *(*handler)(void);
} abort_handlers[] = {
{ ABORT_LOCKDEP, handle_lockdep },
{ ABORT_TAINT, handle_taint },
{ 0, 0 },
};
static char *need_to_abort(const struct settings* settings)
{
typeof(*abort_handlers) *it;
for (it = abort_handlers; it->condition; it++) {
char *abort;
if (!(settings->abort_mask & it->condition))
continue;
abort = it->handler();
if (!abort)
continue;
if (settings->log_level >= LOG_LEVEL_NORMAL)
fprintf(stderr, "Aborting: %s\n", abort);
return abort;
}
return NULL;
}
static void prune_subtest(struct job_list_entry *entry, char *subtest)
{
char *excl;
/*
* Subtest pruning is done by adding exclusion strings to the
* subtest list. The last matching item on the subtest
* selection command line flag decides whether to run a
* subtest, see igt_core.c for details. If the list is empty,
* the expected subtest set is unknown, so we need to add '*'
* first so we can start excluding.
*/
if (entry->subtest_count == 0) {
entry->subtest_count++;
entry->subtests = realloc(entry->subtests, entry->subtest_count * sizeof(*entry->subtests));
entry->subtests[0] = strdup("*");
}
excl = malloc(strlen(subtest) + 2);
excl[0] = '!';
strcpy(excl + 1, subtest);
entry->subtest_count++;
entry->subtests = realloc(entry->subtests, entry->subtest_count * sizeof(*entry->subtests));
entry->subtests[entry->subtest_count - 1] = excl;
}
static bool prune_from_journal(struct job_list_entry *entry, int fd)
{
char *subtest;
FILE *f;
size_t pruned = 0;
size_t old_count = entry->subtest_count;
/*
* Each journal line is a subtest that has been started, or
* the line 'exit:$exitcode (time)', or 'timeout:$exitcode (time)'.
*/
f = fdopen(fd, "r");
if (!f)
return false;
while (fscanf(f, "%ms", &subtest) == 1) {
if (!strncmp(subtest, EXECUTOR_EXIT, strlen(EXECUTOR_EXIT))) {
/* Fully done. Mark that by making the binary name invalid. */
fscanf(f, " (%*fs)");
entry->binary[0] = '\0';
free(subtest);
continue;
}
if (!strncmp(subtest, EXECUTOR_TIMEOUT, strlen(EXECUTOR_TIMEOUT))) {
fscanf(f, " (%*fs)");
free(subtest);
continue;
}
prune_subtest(entry, subtest);
free(subtest);
pruned++;
}
fclose(f);
/*
* If we know the subtests we originally wanted to run, check
* if we got an equal amount already.
*/
if (old_count > 0 && pruned >= old_count)
entry->binary[0] = '\0';
return pruned > 0;
}
static const char *filenames[_F_LAST] = {
[_F_JOURNAL] = "journal.txt",
[_F_OUT] = "out.txt",
[_F_ERR] = "err.txt",
[_F_DMESG] = "dmesg.txt",
};
static int open_at_end(int dirfd, const char *name)
{
int fd = openat(dirfd, name, O_RDWR | O_CREAT | O_CLOEXEC, 0666);
char last;
if (fd >= 0) {
if (lseek(fd, -1, SEEK_END) >= 0 &&
read(fd, &last, 1) == 1 &&
last != '\n') {
write(fd, "\n", 1);
}
lseek(fd, 0, SEEK_END);
}
return fd;
}
static int open_for_reading(int dirfd, const char *name)
{
return openat(dirfd, name, O_RDONLY);
}
bool open_output_files(int dirfd, int *fds, bool write)
{
int i;
int (*openfunc)(int, const char*) = write ? open_at_end : open_for_reading;
for (i = 0; i < _F_LAST; i++) {
if ((fds[i] = openfunc(dirfd, filenames[i])) < 0) {
while (--i >= 0)
close(fds[i]);
return false;
}
}
return true;
}
void close_outputs(int *fds)
{
int i;
for (i = 0; i < _F_LAST; i++) {
close(fds[i]);
}
}
static void dump_dmesg(int kmsgfd, int outfd)
{
/*
* Write kernel messages to the log file until we reach
* 'now'. Unfortunately, /dev/kmsg doesn't support seeking to
* -1 from SEEK_END so we need to use a second fd to read a
* message to match against, or stop when we reach EAGAIN.
*/
int comparefd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
unsigned flags;
unsigned long long seq, cmpseq, usec;
char cont;
char buf[2048];
ssize_t r;
if (comparefd < 0)
return;
lseek(comparefd, 0, SEEK_END);
if (fcntl(kmsgfd, F_SETFL, O_NONBLOCK)) {
close(comparefd);
return;
}
while (1) {
if (comparefd >= 0) {
r = read(comparefd, buf, sizeof(buf) - 1);
if (r < 0) {
if (errno != EAGAIN && errno != EPIPE) {
close(comparefd);
return;
}
} else {
buf[r] = '\0';
if (sscanf(buf, "%u,%llu,%llu,%c;",
&flags, &cmpseq, &usec, &cont) == 4) {
/* Reading comparison record done. */
close(comparefd);
comparefd = -1;
}
}
}
r = read(kmsgfd, buf, sizeof(buf));
if (r <= 0) {
if (errno == EPIPE)
continue;
/*
* If EAGAIN, we're done. If some other error,
* we can't do anything anyway.
*/
close(comparefd);
return;
}
write(outfd, buf, r);
if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;",
&flags, &seq, &usec, &cont) == 4) {
/*
* Comparison record has been read, compare
* the sequence number to see if we have read
* enough.
*/
if (seq >= cmpseq)
return;
}
}
}
static bool kill_child(int sig, pid_t child)
{
/*
* Send the signal to the child directly, and to the child's
* process group.
*/
kill(-child, sig);
if (kill(child, sig) && errno == ESRCH) {
fprintf(stderr, "Child process does not exist. This shouldn't happen.\n");
return false;
}
return true;
}
/*
* Returns:
* =0 - Success
* <0 - Failure executing
* >0 - Timeout happened, need to recreate from journal
*/
static int monitor_output(pid_t child,
int outfd, int errfd, int kmsgfd, int sigfd,
int *outputs,
double *time_spent,
struct settings *settings)
{
fd_set set;
char buf[2048];
char *outbuf = NULL;
size_t outbufsize = 0;
char current_subtest[256] = {};
struct signalfd_siginfo siginfo;
ssize_t s;
int n, status;
int nfds = outfd;
int timeout = settings->inactivity_timeout;
int timeout_intervals = 1, intervals_left;
int wd_extra = 10;
int killed = 0; /* 0 if not killed, signal number otherwise */
struct timespec time_beg, time_end;
unsigned long taints = 0;
bool aborting = false;
igt_gettime(&time_beg);
if (errfd > nfds)
nfds = errfd;
if (kmsgfd > nfds)
nfds = kmsgfd;
if (sigfd > nfds)
nfds = sigfd;
nfds++;
if (timeout > 0) {
/*
* Use original timeout plus some leeway. If we're still
* alive, we want to kill the test process instead of cutting
* power.
*/
int wd_timeout = watchdogs_set_timeout(timeout + wd_extra);
if (wd_timeout < timeout + wd_extra) {
/* Watchdog timeout smaller, so ping it more often */
if (wd_timeout - wd_extra < 0)
wd_extra = wd_timeout / 2;
timeout_intervals = timeout / (wd_timeout - wd_extra);
timeout /= timeout_intervals;
if (settings->log_level >= LOG_LEVEL_VERBOSE) {
printf("Watchdog doesn't support the timeout we requested (shortened to %d seconds).\n"
"Using %d intervals of %d seconds.\n",
wd_timeout, timeout_intervals, timeout);
}
}
}
intervals_left = timeout_intervals;
while (outfd >= 0 || errfd >= 0 || sigfd >= 0) {
struct timeval tv = { .tv_sec = timeout };
FD_ZERO(&set);
if (outfd >= 0)
FD_SET(outfd, &set);
if (errfd >= 0)
FD_SET(errfd, &set);
if (kmsgfd >= 0)
FD_SET(kmsgfd, &set);
if (sigfd >= 0)
FD_SET(sigfd, &set);
n = select(nfds, &set, NULL, NULL, timeout == 0 ? NULL : &tv);
if (n < 0) {
/* TODO */
return -1;
}
if (n == 0) {
if (--intervals_left)
continue;
ping_watchdogs();
switch (killed) {
case 0:
if (settings->log_level >= LOG_LEVEL_NORMAL) {
printf("Timeout. Killing the current test with SIGQUIT.\n");
fflush(stdout);
}
killed = SIGQUIT;
if (!kill_child(killed, child))
return -1;
/*
* Now continue the loop and let the
* dying child be handled normally.
*/
timeout = 20;
watchdogs_set_timeout(120);
intervals_left = timeout_intervals = 1;
break;
case SIGQUIT:
if (settings->log_level >= LOG_LEVEL_NORMAL) {
printf("Timeout. Killing the current test with SIGKILL.\n");
fflush(stdout);
}
killed = SIGKILL;
if (!kill_child(killed, child))
return -1;
intervals_left = timeout_intervals = 1;
break;
case SIGKILL:
/*
* If the child still exists, and the kernel
* hasn't oopsed, assume it is still making
* forward progress towards exiting (i.e. still
* freeing all of its resources).
*/
if (kill(child, 0) == 0 && !tainted(&taints)) {
intervals_left = 1;
break;
}
/* Nothing that can be done, really. Let's tell the caller we want to abort. */
if (settings->log_level >= LOG_LEVEL_NORMAL) {
fprintf(stderr, "Child refuses to die, tainted %lx. Aborting.\n",
taints);
}
close_watchdogs(settings);
free(outbuf);
close(outfd);
close(errfd);
close(kmsgfd);
return -1;
}
continue;
}
intervals_left = timeout_intervals;
ping_watchdogs();
/* TODO: Refactor these handlers to their own functions */
if (outfd >= 0 && FD_ISSET(outfd, &set)) {
char *newline;
s = read(outfd, buf, sizeof(buf));
if (s <= 0) {
if (s < 0) {
fprintf(stderr, "Error reading test's stdout: %s\n",
strerror(errno));
}
close(outfd);
outfd = -1;
goto out_end;
}
write(outputs[_F_OUT], buf, s);
if (settings->sync) {
fdatasync(outputs[_F_OUT]);
}
outbuf = realloc(outbuf, outbufsize + s);
memcpy(outbuf + outbufsize, buf, s);
outbufsize += s;
while ((newline = memchr(outbuf, '\n', outbufsize)) != NULL) {
size_t linelen = newline - outbuf + 1;
if (linelen > strlen(STARTING_SUBTEST) &&
!memcmp(outbuf, STARTING_SUBTEST, strlen(STARTING_SUBTEST))) {
write(outputs[_F_JOURNAL], outbuf + strlen(STARTING_SUBTEST),
linelen - strlen(STARTING_SUBTEST));
memcpy(current_subtest, outbuf + strlen(STARTING_SUBTEST),
linelen - strlen(STARTING_SUBTEST));
current_subtest[linelen - strlen(STARTING_SUBTEST)] = '\0';
if (settings->log_level >= LOG_LEVEL_VERBOSE) {
fwrite(outbuf, 1, linelen, stdout);
}
}
if (linelen > strlen(SUBTEST_RESULT) &&
!memcmp(outbuf, SUBTEST_RESULT, strlen(SUBTEST_RESULT))) {
char *delim = memchr(outbuf, ':', linelen);
if (delim != NULL) {
size_t subtestlen = delim - outbuf - strlen(SUBTEST_RESULT);
if (memcmp(current_subtest, outbuf + strlen(SUBTEST_RESULT),
subtestlen)) {
/* Result for a test that didn't ever start */
write(outputs[_F_JOURNAL],
outbuf + strlen(SUBTEST_RESULT),
subtestlen);
write(outputs[_F_JOURNAL], "\n", 1);
if (settings->sync) {
fdatasync(outputs[_F_JOURNAL]);
}
current_subtest[0] = '\0';
}
if (settings->log_level >= LOG_LEVEL_VERBOSE) {
fwrite(outbuf, 1, linelen, stdout);
}
}
}
memmove(outbuf, newline + 1, outbufsize - linelen);
outbufsize -= linelen;
}
}
out_end:
if (errfd >= 0 && FD_ISSET(errfd, &set)) {
s = read(errfd, buf, sizeof(buf));
if (s <= 0) {
if (s < 0) {
fprintf(stderr, "Error reading test's stderr: %s\n",
strerror(errno));
}
close(errfd);
errfd = -1;
} else {
write(outputs[_F_ERR], buf, s);
if (settings->sync) {
fdatasync(outputs[_F_ERR]);
}
}
}
if (kmsgfd >= 0 && FD_ISSET(kmsgfd, &set)) {
s = read(kmsgfd, buf, sizeof(buf));
if (s < 0) {
if (errno != EPIPE && errno != EINVAL) {
fprintf(stderr, "Error reading from kmsg, stopping monitoring: %s\n",
strerror(errno));
close(kmsgfd);
kmsgfd = -1;
} else if (errno == EINVAL) {
fprintf(stderr, "Warning: Buffer too small for kernel log record, record lost.\n");
}
} else {
write(outputs[_F_DMESG], buf, s);
if (settings->sync) {
fdatasync(outputs[_F_DMESG]);
}
}
}
if (sigfd >= 0 && FD_ISSET(sigfd, &set)) {
double time;
s = read(sigfd, &siginfo, sizeof(siginfo));
if (s < 0) {
fprintf(stderr, "Error reading from signalfd: %s\n",
strerror(errno));
continue;
} else if (siginfo.ssi_signo == SIGCHLD) {
if (child != waitpid(child, &status, WNOHANG)) {
fprintf(stderr, "Failed to reap child\n");
status = 9999;
} else if (WIFEXITED(status)) {
status = WEXITSTATUS(status);
if (status >= 128) {
status = 128 - status;
}
} else if (WIFSIGNALED(status)) {
status = -WTERMSIG(status);
} else {
status = 9999;
}
} else {
/* We're dying, so we're taking them with us */
if (settings->log_level >= LOG_LEVEL_NORMAL)
printf("Abort requested via %s, terminating children\n",
strsignal(siginfo.ssi_signo));
aborting = true;
timeout = 2;
killed = SIGQUIT;
if (!kill_child(killed, child))
return -1;
continue;
}
igt_gettime(&time_end);
time = igt_time_elapsed(&time_beg, &time_end);
if (time < 0.0)
time = 0.0;
if (!aborting) {
dprintf(outputs[_F_JOURNAL], "%s%d (%.3fs)\n",
killed ? EXECUTOR_TIMEOUT : EXECUTOR_EXIT,
status, time);
if (settings->sync) {
fdatasync(outputs[_F_JOURNAL]);
}
if (time_spent)
*time_spent = time;
}
child = 0;
sigfd = -1; /* we are dying, no signal handling for now */
}
}
dump_dmesg(kmsgfd, outputs[_F_DMESG]);
if (settings->sync)
fdatasync(outputs[_F_DMESG]);
free(outbuf);
close(outfd);
close(errfd);
close(kmsgfd);
if (aborting)
return -1;
return killed;
}
static void __attribute__((noreturn))
execute_test_process(int outfd, int errfd,
struct settings *settings,
struct job_list_entry *entry)
{
char *argv[4] = {};
size_t rootlen;
dup2(outfd, STDOUT_FILENO);
dup2(errfd, STDERR_FILENO);
setpgid(0, 0);
rootlen = strlen(settings->test_root);
argv[0] = malloc(rootlen + strlen(entry->binary) + 2);
strcpy(argv[0], settings->test_root);
argv[0][rootlen] = '/';
strcpy(argv[0] + rootlen + 1, entry->binary);
if (entry->subtest_count) {
size_t argsize;
size_t i;
argv[1] = strdup("--run-subtest");
argsize = strlen(entry->subtests[0]);
argv[2] = malloc(argsize + 1);
strcpy(argv[2], entry->subtests[0]);
for (i = 1; i < entry->subtest_count; i++) {
char *sub = entry->subtests[i];
size_t sublen = strlen(sub);
argv[2] = realloc(argv[2], argsize + sublen + 2);
argv[2][argsize] = ',';
strcpy(argv[2] + argsize + 1, sub);
argsize += sublen + 1;
}
}
execv(argv[0], argv);
fprintf(stderr, "Cannot execute %s\n", argv[0]);
exit(IGT_EXIT_INVALID);
}
static int digits(size_t num)
{
int ret = 0;
while (num) {
num /= 10;
ret++;
}
if (ret == 0) ret++;
return ret;
}
static void print_time_left(struct execute_state *state,
struct settings *settings)
{
int width;
if (settings->overall_timeout <= 0)
return;
width = digits(settings->overall_timeout);
printf("(%*.0fs left) ", width, state->time_left);
}
static char *entry_display_name(struct job_list_entry *entry)
{
size_t size = strlen(entry->binary) + 1;
char *ret = malloc(size);
sprintf(ret, "%s", entry->binary);
if (entry->subtest_count > 0) {
size_t i;
const char *delim = "";
size += 3; /* strlen(" (") + strlen(")") */
ret = realloc(ret, size);
strcat(ret, " (");
for (i = 0; i < entry->subtest_count; i++) {
size += strlen(delim) + strlen(entry->subtests[i]);
ret = realloc(ret, size);
strcat(ret, delim);
strcat(ret, entry->subtests[i]);
delim = ", ";
}
/* There's already room for this */
strcat(ret, ")");
}
return ret;
}
/*
* Returns:
* =0 - Success
* <0 - Failure executing
* >0 - Timeout happened, need to recreate from journal
*/
static int execute_next_entry(struct execute_state *state,
size_t total,
double *time_spent,
struct settings *settings,
struct job_list_entry *entry,
int testdirfd, int resdirfd,
int sigfd, sigset_t *sigmask)
{
int dirfd;
int outputs[_F_LAST];
int kmsgfd;
int outpipe[2] = { -1, -1 };
int errpipe[2] = { -1, -1 };
int outfd, errfd;
char name[32];
pid_t child;
int result;
size_t idx = state->next;
snprintf(name, sizeof(name), "%zd", idx);
mkdirat(resdirfd, name, 0777);
if ((dirfd = openat(resdirfd, name, O_DIRECTORY | O_RDONLY | O_CLOEXEC)) < 0) {
fprintf(stderr, "Error accessing individual test result directory\n");
return -1;
}
if (!open_output_files(dirfd, outputs, true)) {
fprintf(stderr, "Error opening output files\n");
result = -1;
goto out_dirfd;
}
if (settings->sync) {
fsync(dirfd);
fsync(resdirfd);
}
if (pipe(outpipe) || pipe(errpipe)) {
fprintf(stderr, "Error creating pipes: %s\n", strerror(errno));
result = -1;
goto out_pipe;
}
if ((kmsgfd = open("/dev/kmsg", O_RDONLY | O_CLOEXEC)) < 0) {
fprintf(stderr, "Warning: Cannot open /dev/kmsg\n");
} else {
/* TODO: Checking of abort conditions in pre-execute dmesg */
lseek(kmsgfd, 0, SEEK_END);
}
if (settings->log_level >= LOG_LEVEL_NORMAL) {
char *displayname;
int width = digits(total);
printf("[%0*zd/%0*zd] ", width, idx + 1, width, total);
print_time_left(state, settings);
displayname = entry_display_name(entry);
printf("%s", displayname);
free(displayname);
printf("\n");
}
/*
* Flush outputs before forking so our (buffered) output won't
* end up in the test outputs.
*/
fflush(stdout);
fflush(stderr);
child = fork();
if (child < 0) {
fprintf(stderr, "Failed to fork: %s\n", strerror(errno));
result = -1;
goto out_kmsgfd;
} else if (child == 0) {
outfd = outpipe[1];
errfd = errpipe[1];
close(outpipe[0]);
close(errpipe[0]);
sigprocmask(SIG_UNBLOCK, sigmask, NULL);
setenv("IGT_SENTINEL_ON_STDERR", "1", 1);
execute_test_process(outfd, errfd, settings, entry);
/* unreachable */
}
outfd = outpipe[0];
errfd = errpipe[0];
close(outpipe[1]);
close(errpipe[1]);
outpipe[1] = errpipe[1] = -1;
result = monitor_output(child, outfd, errfd, kmsgfd, sigfd,
outputs, time_spent, settings);
out_kmsgfd:
close(kmsgfd);
out_pipe:
close_outputs(outputs);
close(outpipe[0]);
close(outpipe[1]);
close(errpipe[0]);
close(errpipe[1]);
close_outputs(outputs);
out_dirfd:
close(dirfd);
return result;
}
static int remove_file(int dirfd, const char *name)
{
return unlinkat(dirfd, name, 0) && errno != ENOENT;
}
static bool clear_test_result_directory(int dirfd)
{
int i;
for (i = 0; i < _F_LAST; i++) {
if (remove_file(dirfd, filenames[i])) {
fprintf(stderr, "Error deleting %s from test result directory: %s\n",
filenames[i],
strerror(errno));
return false;
}
}
return true;
}
static bool clear_old_results(char *path)
{
int dirfd;
size_t i;
if ((dirfd = open(path, O_DIRECTORY | O_RDONLY)) < 0) {
if (errno == ENOENT) {
/* Successfully cleared if it doesn't even exist */
return true;
}
fprintf(stderr, "Error clearing old results: %s\n", strerror(errno));
return false;
}
if (remove_file(dirfd, "uname.txt") ||
remove_file(dirfd, "starttime.txt") ||
remove_file(dirfd, "endtime.txt") ||
remove_file(dirfd, "aborted.txt")) {
close(dirfd);
fprintf(stderr, "Error clearing old results: %s\n", strerror(errno));
return false;
}
for (i = 0; true; i++) {
char name[32];
int resdirfd;
snprintf(name, sizeof(name), "%zd", i);
if ((resdirfd = openat(dirfd, name, O_DIRECTORY | O_RDONLY)) < 0)
break;
if (!clear_test_result_directory(resdirfd)) {
close(resdirfd);
close(dirfd);
return false;
}
close(resdirfd);
if (unlinkat(dirfd, name, AT_REMOVEDIR)) {
fprintf(stderr,
"Warning: Result directory %s contains extra files\n",
name);
}
}
close(dirfd);
return true;
}
static double timeofday_double(void)
{
struct timeval tv;
if (!gettimeofday(&tv, NULL))
return tv.tv_sec + tv.tv_usec / 1000000.0;
return 0.0;
}
static void init_time_left(struct execute_state *state,
struct settings *settings)
{
if (settings->overall_timeout <= 0)
state->time_left = -1;
else
state->time_left = settings->overall_timeout;
}
bool initialize_execute_state_from_resume(int dirfd,
struct execute_state *state,
struct settings *settings,
struct job_list *list)
{
struct job_list_entry *entry;
int resdirfd, fd, i;
free_settings(settings);
free_job_list(list);
memset(state, 0, sizeof(*state));
state->resuming = true;
if (!read_settings_from_dir(settings, dirfd) ||
!read_job_list(list, dirfd)) {
close(dirfd);
return false;
}
init_time_left(state, settings);
for (i = list->size; i >= 0; i--) {
char name[32];
snprintf(name, sizeof(name), "%d", i);
if ((resdirfd = openat(dirfd, name, O_DIRECTORY | O_RDONLY)) >= 0)
break;
}
if (i < 0)
/* Nothing has been executed yet, state is fine as is */
goto success;
entry = &list->entries[i];
state->next = i;
if ((fd = openat(resdirfd, filenames[_F_JOURNAL], O_RDONLY)) >= 0) {
if (!prune_from_journal(entry, fd)) {
/*
* The test does not have subtests, or
* incompleted before the first subtest
* began. Either way, not suitable to
* re-run.
*/
state->next = i + 1;
} else if (entry->binary[0] == '\0') {
/* This test is fully completed */
state->next = i + 1;
}
close(fd);
}
success:
close(resdirfd);
close(dirfd);
return true;
}
bool initialize_execute_state(struct execute_state *state,
struct settings *settings,
struct job_list *job_list)
{
memset(state, 0, sizeof(*state));
if (!validate_settings(settings))
return false;
if (!serialize_settings(settings) ||
!serialize_job_list(job_list, settings))
return false;
if (settings->overwrite &&
!clear_old_results(settings->results_path))
return false;
init_time_left(state, settings);
state->dry = settings->dry_run;
return true;
}
static void reduce_time_left(struct settings *settings,
struct execute_state *state,
double time_spent)
{
if (state->time_left < 0)
return;
if (time_spent > state->time_left)
state->time_left = 0.0;
else
state->time_left -= time_spent;
}
static bool overall_timeout_exceeded(struct execute_state *state)
{
return state->time_left == 0.0;
}
static void write_abort_file(int resdirfd,
const char *reason,
const char *testbefore,
const char *testafter)
{
int abortfd;
if ((abortfd = openat(resdirfd, "aborted.txt", O_CREAT | O_WRONLY | O_EXCL, 0666)) >= 0) {
/*
* Ignore failure to open, there's
* already an abort probably (if this
* is a resume)
*/
dprintf(abortfd, "Aborting.\n");
dprintf(abortfd, "Previous test: %s\n", testbefore);
dprintf(abortfd, "Next test: %s\n\n", testafter);
write(abortfd, reason, strlen(reason));
close(abortfd);
}
}
static void oom_immortal(void)
{
int fd;
const char never_kill[] = "-1000";
fd = open("/proc/self/oom_score_adj", O_WRONLY);
if (fd < 0) {
fprintf(stderr, "Warning: Cannot adjust oom score.\n");
return;
}
if (write(fd, never_kill, sizeof(never_kill)) != sizeof(never_kill))
fprintf(stderr, "Warning: Adjusting oom score failed.\n");
close(fd);
}
static bool should_die_because_signal(int sigfd)
{
struct signalfd_siginfo siginfo;
int ret;
struct pollfd sigpoll = { .fd = sigfd, .events = POLLIN | POLLRDBAND };
ret = poll(&sigpoll, 1, 0);
if (ret != 0) {
if (ret == -1) {
fprintf(stderr, "Poll on signalfd failed with %s\n", strerror(errno));
return true; /* something is wrong, let's die */
}
ret = read(sigfd, &siginfo, sizeof(siginfo));
if (ret == -1) {
fprintf(stderr, "Error reading from signalfd: %s\n", strerror(errno));
return false; /* we may want to retry later */
}
if (siginfo.ssi_signo == SIGCHLD) {
fprintf(stderr, "Runner got stray SIGCHLD while not executing any tests.\n");
} else {
fprintf(stderr, "Runner is being killed by %s\n",
strsignal(siginfo.ssi_signo));
return true;
}
}
return false;
}
bool execute(struct execute_state *state,
struct settings *settings,
struct job_list *job_list)
{
struct utsname unamebuf;
int resdirfd, testdirfd, unamefd, timefd;
sigset_t sigmask;
int sigfd;
double time_spent = 0.0;
bool status = true;
if (state->dry) {
printf("Dry run, not executing. Invoke igt_resume if you want to execute.\n");
return true;
}
if ((resdirfd = open(settings->results_path, O_DIRECTORY | O_RDONLY)) < 0) {
/* Initialize state should have done this */
fprintf(stderr, "Error: Failure opening results path %s\n",
settings->results_path);
return false;
}
if ((testdirfd = open(settings->test_root, O_DIRECTORY | O_RDONLY)) < 0) {
fprintf(stderr, "Error: Failure opening test root %s\n",
settings->test_root);
close(resdirfd);
return false;
}
/* TODO: On resume, don't rewrite, verify that content matches current instead */
if ((unamefd = openat(resdirfd, "uname.txt", O_CREAT | O_WRONLY | O_TRUNC, 0666)) < 0) {
fprintf(stderr, "Error: Failure opening uname.txt: %s\n",
strerror(errno));
close(testdirfd);
close(resdirfd);
return false;
}
if ((timefd = openat(resdirfd, "starttime.txt", O_CREAT | O_WRONLY | O_EXCL, 0666)) >= 0) {
/*
* Ignore failure to open. If this is a resume, we
* don't want to overwrite. For other errors, we
* ignore the start time.
*/
dprintf(timefd, "%f\n", timeofday_double());
close(timefd);
}
oom_immortal();
sigemptyset(&sigmask);
sigaddset(&sigmask, SIGCHLD);
sigaddset(&sigmask, SIGINT);
sigaddset(&sigmask, SIGTERM);
sigaddset(&sigmask, SIGQUIT);
sigaddset(&sigmask, SIGHUP);
sigfd = signalfd(-1, &sigmask, O_CLOEXEC);
sigprocmask(SIG_BLOCK, &sigmask, NULL);
if (sigfd < 0) {
/* TODO: Handle better */
fprintf(stderr, "Cannot mask signals\n");
status = false;
goto end;
}
init_watchdogs(settings);
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
unamebuf.sysname,
unamebuf.nodename,
unamebuf.release,
unamebuf.version,
unamebuf.machine);
} else {
dprintf(unamefd, "uname() failed\n");
}
close(unamefd);
/* Check if we're already in abort-state at bootup */
if (!state->resuming) {
char *reason;
if ((reason = need_to_abort(settings)) != NULL) {
char *nexttest = entry_display_name(&job_list->entries[state->next]);
write_abort_file(resdirfd, reason, "nothing", nexttest);
free(reason);
free(nexttest);
status = false;
goto end;
}
}
for (; state->next < job_list->size;
state->next++) {
char *reason;
int result;
if (should_die_because_signal(sigfd)) {
status = false;
goto end;
}
result = execute_next_entry(state,
job_list->size,
&time_spent,
settings,
&job_list->entries[state->next],
testdirfd, resdirfd,
sigfd, &sigmask);
if (result < 0) {
status = false;
break;
}
reduce_time_left(settings, state, time_spent);
if (overall_timeout_exceeded(state)) {
if (settings->log_level >= LOG_LEVEL_NORMAL) {
printf("Overall timeout time exceeded, stopping.\n");
}
break;
}
if ((reason = need_to_abort(settings)) != NULL) {
char *prev = entry_display_name(&job_list->entries[state->next]);
char *next = (state->next + 1 < job_list->size ?
entry_display_name(&job_list->entries[state->next + 1]) :
strdup("nothing"));
write_abort_file(resdirfd, reason, prev, next);
free(prev);
free(next);
free(reason);
status = false;
break;
}
if (result > 0) {
double time_left = state->time_left;
close_watchdogs(settings);
sigprocmask(SIG_UNBLOCK, &sigmask, NULL);
/* make sure that we do not leave any signals unhandled */
if (should_die_because_signal(sigfd)) {
status = false;
goto end_post_signal_restore;
}
close(sigfd);
close(testdirfd);
initialize_execute_state_from_resume(resdirfd, state, settings, job_list);
state->time_left = time_left;
return execute(state, settings, job_list);
}
}
if ((timefd = openat(resdirfd, "endtime.txt", O_CREAT | O_WRONLY | O_EXCL, 0666)) >= 0) {
dprintf(timefd, "%f\n", timeofday_double());
close(timefd);
}
end:
close_watchdogs(settings);
sigprocmask(SIG_UNBLOCK, &sigmask, NULL);
/* make sure that we do not leave any signals unhandled */
if (should_die_because_signal(sigfd))
status = false;
end_post_signal_restore:
close(sigfd);
close(testdirfd);
close(resdirfd);
return status;
}