blob: 487b6ec398e643d4bbad355a976e5cd57090cf1b [file] [log] [blame]
/*
* Copyright (C) 2014-2015 Red Hat, Inc.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*/
#define _XOPEN_SOURCE 500 /* pthread */
#define _ISOC99_SOURCE
#define _REENTRANT
#include "tool.h"
#include "daemon-io.h"
#include "daemon-server.h"
#include "daemon-log.h"
#include "lvm-version.h"
#include "lvmetad-client.h"
#include "lvmlockd-client.h"
/* #include <assert.h> */
#include <errno.h>
#include <pthread.h>
#include <stddef.h>
#include <poll.h>
#include <signal.h>
#include <getopt.h>
#include <syslog.h>
#include <dirent.h>
#include <time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sys/un.h>
#define EXTERN
#include "lvmlockd-internal.h"
/*
* Basic operation of lvmlockd
*
* lvmlockd main process runs main_loop() which uses poll().
* poll listens for new connections from lvm commands and for
* messages from existing connected lvm commands.
*
* lvm command starts and connects to lvmlockd.
*
* lvmlockd receives a connection request from command and adds a
* 'struct client' to keep track of the connection to the command.
* The client's fd is added to the set of fd's in poll().
*
* lvm command sends a lock request to lvmlockd. The lock request
* can be for the global lock, a vg lock, or an lv lock.
*
* lvmlockd main_loop/poll sees a message from an existing client.
* It sets client.recv = 1, then wakes up client_thread_main.
*
* client_thread_main iterates through client structs (cl), looking
* for any that need processing, finds the one with cl->recv set,
* and calls client_recv_action(cl).
*
* client_recv_action(cl) reads the message/request from the client,
* allocates a new 'struct action' (act) to represent the request,
* sets the act with what is found in the request, then looks at
* the specific operation in act->op (LD_OP_FOO) to decide what to
* do with the action:
*
* . If the action is to start a lockspace, create a new thread
* to manage that lockspace: add_lockspace(act).
*
* . If the action is a lock request, pass the act to the thread
* that is managing that lockspace: add_lock_action(act).
*
* . Other misc actions are are passed to the worker_thread:
* add_work_action(act).
*
* Onec the client_thread has passed the action off to another
* thread to process, it goes back to waiting for more client
* handling work to do.
*
* The thread that was given the action by the client_thread
* now processes that action according to the operation, act->op.
* This is either a lockspace_thread (for lock ops or ops that
* add/rem a lockspace), or the worker_thread. See below for
* how these ops are processed by these threads. When the
* given thread is done processing the action, the result is
* set in act->result, and the act struct for the completed action
* is passed back to the client_thread (client_results list).
*
* The client_thread takes completed actions (from client_results
* list), and sends the result back to the client that sent the
* request represented by the action. The act struct is then freed.
*
* This completes the cycle of work between lvm commands (clients)
* and lvmlockd. In summary:
*
* - main process polls for new client connections and new requests
* from lvm commands
* - client_thread reads requests from clients
* - client_thread creates an action struct for each request
* - client_thread passes the act to another thread for processing
* - other threads pass completed act structs back to client_thread
* - client_thread sends the act result back to the client and frees the act
*
*
* Lockspace threads:
* Each lockd VG has its own lockspace that contains locks for that VG.
* Each 'struct lockspace' is managed by a separate lockspace_thread.
* When the lockspace_thread is first created, the first thing it does
* is join the lockspace in the lock manager. This can take a long time.
* If the join fails, the thread exits. After the join, the thread
* enters a loop waiting for lock actions to perform in the lockspace.
*
* The request to remove/leave a lockspace causes a flag to be set in
* the lockspace struct. When the lockspace_thread sees this flag
* set, it leaves the lockspace, and exits.
*
* When the client_thread passes a new action to a lockspace_thread,
* i.e. a new lock request, the lockspace_thread identifies which resource
* is being locked (GL, VG, LV), and gets the 'struct resource' (r) for it.
* r->type will be LD_RT_GL, LD_RT_VG, or LD_RT_LV. r->name is the
* resource name, and is fixed for GL and VG resources, but is based on
* the LV name for LV resources. The act is added to the resource's
* list of actions: r->actions, i.e. outstanding lock requests on the
* resource.
*
* The lockspace thread then iterates through each resource in the
* lockspace, processing any outstanding actions on each: res_process(ls, r).
*
* res_process() compares the outstanding actions/requests in r->actions
* against any existing locks on the resource in r->locks. If the
* action is blocked by existing locks, it's left on r->actions. If not,
* the action/request is passed to the lock manager. If the result from
* the lock manager is success, a new 'struct lock' is created for the
* action and saved on r->locks. The result is set in act->result and
* the act is passed back to the client_thread to be returned to the client.
*/
static const char *lvmlockd_protocol = "lvmlockd";
static const int lvmlockd_protocol_version = 1;
static int daemon_quit;
static int adopt_opt;
static daemon_handle lvmetad_handle;
static pthread_mutex_t lvmetad_mutex;
static int lvmetad_connected;
/*
* We use a separate socket for dumping daemon info.
* This will not interfere with normal operations, and allows
* free-form debug data to be dumped instead of the libdaemon
* protocol that wants all data in the cft format.
* 1MB should fit all the info we need to dump.
*/
#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
#define DUMP_BUF_SIZE (1024 * 1024)
static char dump_buf[DUMP_BUF_SIZE];
static struct sockaddr_un dump_addr;
static socklen_t dump_addrlen;
/*
* Main program polls client connections, adds new clients,
* adds work for client thread.
*
* pollfd_mutex is used for adding vs removing entries,
* and for resume vs realloc.
*/
#define POLL_FD_UNUSED -1 /* slot if free */
#define POLL_FD_IGNORE -2 /* slot is used but ignore in poll */
#define ADD_POLL_SIZE 16 /* increment slots by this amount */
static pthread_mutex_t pollfd_mutex;
static struct pollfd *pollfd;
static int pollfd_size;
static int pollfd_maxi;
static int listen_pi;
static int listen_fd;
static int restart_pi;
static int restart_fds[2];
/*
* Each lockspace has its own thread to do locking.
* The lockspace thread makes synchronous lock requests to dlm/sanlock.
* Every vg with a lockd type, i.e. "dlm", "sanlock", should be on this list.
*/
static pthread_mutex_t lockspaces_mutex;
static struct list_head lockspaces;
/*
* Client thread reads client requests and writes client results.
*/
static pthread_t client_thread;
static pthread_mutex_t client_mutex;
static pthread_cond_t client_cond;
static struct list_head client_list; /* connected clients */
static struct list_head client_results; /* actions to send back to clients */
static uint32_t client_ids; /* 0 and INTERNAL_CLIENT_ID are skipped */
static int client_stop; /* stop the thread */
static int client_work; /* a client on client_list has work to do */
#define INTERNAL_CLIENT_ID 0xFFFFFFFF /* special client_id for internal actions */
static struct list_head adopt_results; /* special start actions from adopt_locks() */
/*
* Worker thread performs misc non-locking actions, e.g. init/free.
*/
static pthread_t worker_thread;
static pthread_mutex_t worker_mutex;
static pthread_cond_t worker_cond;
static struct list_head worker_list; /* actions for worker_thread */
static int worker_stop; /* stop the thread */
static int worker_wake; /* wake the thread without adding work */
/*
* The content of every log_foo() statement is saved in the
* circular buffer, which can be dumped to a client and printed.
*/
#define LOG_LINE_SIZE 256
#define LOG_DUMP_SIZE DUMP_BUF_SIZE
#define LOG_SYSLOG_PRIO LOG_WARNING
static char log_dump[LOG_DUMP_SIZE];
static unsigned int log_point;
static unsigned int log_wrap;
static pthread_mutex_t log_mutex;
static int syslog_priority = LOG_SYSLOG_PRIO;
/*
* Structure pools to avoid repeated malloc/free.
*/
#define MAX_UNUSED_ACTION 64
#define MAX_UNUSED_CLIENT 64
#define MAX_UNUSED_RESOURCE 64
#define MAX_UNUSED_LOCK 64
static pthread_mutex_t unused_struct_mutex;
static struct list_head unused_action;
static struct list_head unused_client;
static struct list_head unused_resource;
static struct list_head unused_lock;
static int unused_action_count;
static int unused_client_count;
static int unused_resource_count;
static int unused_lock_count;
static int resource_lm_data_size; /* max size of lm_data from sanlock|dlm */
static int alloc_new_structs; /* used for initializing in setup_structs */
#define DO_STOP 1
#define NO_STOP 0
#define DO_FREE 1
#define NO_FREE 0
#define DO_FORCE 1
#define NO_FORCE 0
static int add_lock_action(struct action *act);
static int str_to_lm(const char *str);
static int setup_dump_socket(void);
static void send_dump_buf(int fd, int dump_len);
static int dump_info(int *dump_len);
static int dump_log(int *dump_len);
static int _syslog_name_to_num(const char *name)
{
if (!strcmp(name, "emerg"))
return LOG_EMERG;
if (!strcmp(name, "alert"))
return LOG_ALERT;
if (!strcmp(name, "crit"))
return LOG_CRIT;
if (!strcmp(name, "err") || !strcmp(name, "error"))
return LOG_ERR;
if (!strcmp(name, "warning") || !strcmp(name, "warn"))
return LOG_WARNING;
if (!strcmp(name, "notice"))
return LOG_NOTICE;
if (!strcmp(name, "info"))
return LOG_INFO;
if (!strcmp(name, "debug"))
return LOG_DEBUG;
return LOG_WARNING;
}
static const char *_syslog_num_to_name(int num)
{
switch (num) {
case LOG_EMERG:
return "emerg";
case LOG_ALERT:
return "alert";
case LOG_CRIT:
return "crit";
case LOG_ERR:
return "err";
case LOG_WARNING:
return "warning";
case LOG_NOTICE:
return "notice";
case LOG_INFO:
return "info";
case LOG_DEBUG:
return "debug";
}
return "unknown";
}
static uint64_t monotime(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec;
}
static void log_save_line(int len, char *line,
char *log_buf, unsigned int *point, unsigned int *wrap)
{
unsigned int p = *point;
unsigned int w = *wrap;
int i;
if (len < LOG_DUMP_SIZE - p) {
memcpy(log_buf + p, line, len);
p += len;
if (p == LOG_DUMP_SIZE) {
p = 0;
w = 1;
}
goto out;
}
for (i = 0; i < len; i++) {
log_buf[p++] = line[i];
if (p == LOG_DUMP_SIZE) {
p = 0;
w = 1;
}
}
out:
*point = p;
*wrap = w;
}
void log_level(int level, const char *fmt, ...)
{
char line[LOG_LINE_SIZE];
va_list ap;
int len = LOG_LINE_SIZE - 1;
int ret, pos = 0;
memset(line, 0, sizeof(line));
ret = snprintf(line, len, "%llu ", (unsigned long long)time(NULL));
pos += ret;
va_start(ap, fmt);
ret = vsnprintf(line + pos, len - pos, fmt, ap);
va_end(ap);
if (ret >= len - pos)
pos = len - 1;
else
pos += ret;
line[pos++] = '\n';
line[pos++] = '\0';
pthread_mutex_lock(&log_mutex);
log_save_line(pos - 1, line, log_dump, &log_point, &log_wrap);
pthread_mutex_unlock(&log_mutex);
if (level <= syslog_priority)
syslog(level, "%s", line);
if (daemon_debug)
fprintf(stderr, "%s", line);
}
static int dump_log(int *dump_len)
{
int tail_len;
pthread_mutex_lock(&log_mutex);
if (!log_wrap && !log_point) {
*dump_len = 0;
} else if (log_wrap) {
tail_len = LOG_DUMP_SIZE - log_point;
memcpy(dump_buf, log_dump+log_point, tail_len);
if (log_point)
memcpy(dump_buf+tail_len, log_dump, log_point);
*dump_len = LOG_DUMP_SIZE;
} else {
memcpy(dump_buf, log_dump, log_point-1);
*dump_len = log_point-1;
}
pthread_mutex_unlock(&log_mutex);
return 0;
}
struct lockspace *alloc_lockspace(void)
{
struct lockspace *ls;
if (!(ls = malloc(sizeof(struct lockspace)))) {
log_error("out of memory for lockspace");
return NULL;
}
memset(ls, 0, sizeof(struct lockspace));
INIT_LIST_HEAD(&ls->actions);
INIT_LIST_HEAD(&ls->resources);
pthread_mutex_init(&ls->mutex, NULL);
pthread_cond_init(&ls->cond, NULL);
return ls;
}
static struct action *alloc_action(void)
{
struct action *act;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_action_count || alloc_new_structs) {
act = malloc(sizeof(struct action));
} else {
act = list_first_entry(&unused_action, struct action, list);
list_del(&act->list);
unused_action_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (act)
memset(act, 0, sizeof(struct action));
else
log_error("out of memory for action");
return act;
}
static struct client *alloc_client(void)
{
struct client *cl;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_client_count || alloc_new_structs) {
cl = malloc(sizeof(struct client));
} else {
cl = list_first_entry(&unused_client, struct client, list);
list_del(&cl->list);
unused_client_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (cl)
memset(cl, 0, sizeof(struct client));
else
log_error("out of memory for client");
return cl;
}
static struct resource *alloc_resource(void)
{
struct resource *r;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_resource_count || alloc_new_structs) {
r = malloc(sizeof(struct resource) + resource_lm_data_size);
} else {
r = list_first_entry(&unused_resource, struct resource, list);
list_del(&r->list);
unused_resource_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (r) {
memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
INIT_LIST_HEAD(&r->locks);
INIT_LIST_HEAD(&r->actions);
} else {
log_error("out of memory for resource");
}
return r;
}
static struct lock *alloc_lock(void)
{
struct lock *lk;
pthread_mutex_lock(&unused_struct_mutex);
if (!unused_lock_count || alloc_new_structs) {
lk = malloc(sizeof(struct lock));
} else {
lk = list_first_entry(&unused_lock, struct lock, list);
list_del(&lk->list);
unused_lock_count--;
}
pthread_mutex_unlock(&unused_struct_mutex);
if (lk)
memset(lk, 0, sizeof(struct lock));
else
log_error("out of memory for lock");
return lk;
}
static void free_action(struct action *act)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_action_count >= MAX_UNUSED_ACTION) {
free(act);
} else {
list_add_tail(&act->list, &unused_action);
unused_action_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_client(struct client *cl)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_client_count >= MAX_UNUSED_CLIENT) {
free(cl);
} else {
list_add_tail(&cl->list, &unused_client);
unused_client_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_resource(struct resource *r)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
free(r);
} else {
list_add_tail(&r->list, &unused_resource);
unused_resource_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static void free_lock(struct lock *lk)
{
pthread_mutex_lock(&unused_struct_mutex);
if (unused_lock_count >= MAX_UNUSED_LOCK) {
free(lk);
} else {
list_add_tail(&lk->list, &unused_lock);
unused_lock_count++;
}
pthread_mutex_unlock(&unused_struct_mutex);
}
static int setup_structs(void)
{
struct action *act;
struct client *cl;
struct resource *r;
struct lock *lk;
int data_san = lm_data_size_sanlock();
int data_dlm = lm_data_size_dlm();
int i;
resource_lm_data_size = data_san > data_dlm ? data_san : data_dlm;
pthread_mutex_init(&unused_struct_mutex, NULL);
INIT_LIST_HEAD(&unused_action);
INIT_LIST_HEAD(&unused_client);
INIT_LIST_HEAD(&unused_resource);
INIT_LIST_HEAD(&unused_lock);
/*
* For setup, force the alloc_ functions to alloc new structs instead
* of taking them unused. This allows alloc_struct/free_struct loop to
* populate the unused lists.
*/
alloc_new_structs = 1;
for (i = 0; i < MAX_UNUSED_ACTION/2; i++) {
if (!(act = alloc_action()))
goto fail;
free_action(act);
}
for (i = 0; i < MAX_UNUSED_CLIENT/2; i++) {
if (!(cl = alloc_client()))
goto fail;
free_client(cl);
}
for (i = 0; i < MAX_UNUSED_RESOURCE/2; i++) {
if (!(r = alloc_resource()))
goto fail;
free_resource(r);
}
for (i = 0; i < MAX_UNUSED_LOCK/2; i++) {
if (!(lk = alloc_lock()))
goto fail;
free_lock(lk);
}
alloc_new_structs = 0;
return 0;
fail:
alloc_new_structs = 0;
return -ENOMEM;
}
static int add_pollfd(int fd)
{
int i, new_size;
struct pollfd *tmp_pollfd;
pthread_mutex_lock(&pollfd_mutex);
for (i = 0; i < pollfd_size; i++) {
if (pollfd[i].fd != POLL_FD_UNUSED)
continue;
pollfd[i].fd = fd;
pollfd[i].events = POLLIN;
pollfd[i].revents = 0;
if (i > pollfd_maxi)
pollfd_maxi = i;
pthread_mutex_unlock(&pollfd_mutex);
return i;
}
new_size = pollfd_size + ADD_POLL_SIZE;
tmp_pollfd = realloc(pollfd, new_size * sizeof(struct pollfd));
if (!tmp_pollfd) {
log_error("can't alloc new size %d for pollfd", new_size);
pthread_mutex_unlock(&pollfd_mutex);
return -ENOMEM;
}
pollfd = tmp_pollfd;
for (i = pollfd_size; i < new_size; i++) {
pollfd[i].fd = POLL_FD_UNUSED;
pollfd[i].events = 0;
pollfd[i].revents = 0;
}
i = pollfd_size;
pollfd[i].fd = fd;
pollfd[i].events = POLLIN;
pollfd[i].revents = 0;
pollfd_maxi = i;
pollfd_size = new_size;
pthread_mutex_unlock(&pollfd_mutex);
return i;
}
static void rem_pollfd(int pi)
{
if (pi < 0) {
log_error("rem_pollfd %d", pi);
return;
}
pthread_mutex_lock(&pollfd_mutex);
pollfd[pi].fd = POLL_FD_UNUSED;
pollfd[pi].events = 0;
pollfd[pi].revents = 0;
pthread_mutex_unlock(&pollfd_mutex);
}
static const char *lm_str(int x)
{
switch (x) {
case LD_LM_NONE:
return "none";
case LD_LM_DLM:
return "dlm";
case LD_LM_SANLOCK:
return "sanlock";
default:
return "lm_unknown";
}
}
static const char *rt_str(int x)
{
switch (x) {
case LD_RT_GL:
return "gl";
case LD_RT_VG:
return "vg";
case LD_RT_LV:
return "lv";
default:
return ".";
};
}
static const char *op_str(int x)
{
switch (x) {
case LD_OP_INIT:
return "init";
case LD_OP_FREE:
return "free";
case LD_OP_START:
return "start";
case LD_OP_STOP:
return "stop";
case LD_OP_LOCK:
return "lock";
case LD_OP_UPDATE:
return "update";
case LD_OP_CLOSE:
return "close";
case LD_OP_ENABLE:
return "enable";
case LD_OP_DISABLE:
return "disable";
case LD_OP_START_WAIT:
return "start_wait";
case LD_OP_STOP_ALL:
return "stop_all";
case LD_OP_RENAME_BEFORE:
return "rename_before";
case LD_OP_RENAME_FINAL:
return "rename_final";
case LD_OP_RUNNING_LM:
return "running_lm";
case LD_OP_FIND_FREE_LOCK:
return "find_free_lock";
case LD_OP_KILL_VG:
return "kill_vg";
case LD_OP_DROP_VG:
return "drop_vg";
case LD_OP_DUMP_LOG:
return "dump_log";
case LD_OP_DUMP_INFO:
return "dump_info";
case LD_OP_BUSY:
return "busy";
default:
return "op_unknown";
};
}
static const char *mode_str(int x)
{
switch (x) {
case LD_LK_IV:
return "iv";
case LD_LK_UN:
return "un";
case LD_LK_NL:
return "nl";
case LD_LK_SH:
return "sh";
case LD_LK_EX:
return "ex";
default:
return ".";
};
}
int last_string_from_args(char *args_in, char *last)
{
const char *args = args_in;
const char *colon, *str = NULL;
while (1) {
if (!args || (*args == '\0'))
break;
colon = strstr(args, ":");
if (!colon)
break;
str = colon;
args = colon + 1;
}
if (str) {
snprintf(last, MAX_ARGS, "%s", str + 1);
return 0;
}
return -1;
}
int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
{
char version[MAX_ARGS+1];
char *major_str, *minor_str, *patch_str;
char *n, *d1, *d2;
memset(version, 0, sizeof(version));
strncpy(version, args, MAX_ARGS);
version[MAX_ARGS] = '\0';
n = strstr(version, ":");
if (n)
*n = '\0';
d1 = strstr(version, ".");
if (!d1)
return -1;
d2 = strstr(d1 + 1, ".");
if (!d2)
return -1;
major_str = version;
minor_str = d1 + 1;
patch_str = d2 + 1;
*d1 = '\0';
*d2 = '\0';
if (major)
*major = atoi(major_str);
if (minor)
*minor = atoi(minor_str);
if (patch)
*patch = atoi(patch_str);
return 0;
}
/*
* These are few enough that arrays of function pointers can
* be avoided.
*/
static int lm_prepare_lockspace(struct lockspace *ls, struct action *act)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_prepare_lockspace_dlm(ls);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_prepare_lockspace_sanlock(ls);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_add_lockspace(struct lockspace *ls, struct action *act, int adopt)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_add_lockspace_dlm(ls, adopt);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_add_lockspace_sanlock(ls, adopt);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_rem_lockspace(struct lockspace *ls, struct action *act, int free_vg)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_rem_lockspace_dlm(ls, free_vg);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_rem_lockspace_sanlock(ls, free_vg);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
struct val_blk *vb_out, int *retry, int adopt)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_lock_dlm(ls, r, mode, vb_out, adopt);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_lock_sanlock(ls, r, mode, vb_out, retry, adopt);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_convert(struct lockspace *ls, struct resource *r,
int mode, struct action *act, uint32_t r_version)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_convert_dlm(ls, r, mode, r_version);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_convert_sanlock(ls, r, mode, r_version);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_unlock(struct lockspace *ls, struct resource *r, struct action *act,
uint32_t r_version, uint32_t lmu_flags)
{
int rv;
if (ls->lm_type == LD_LM_DLM)
rv = lm_unlock_dlm(ls, r, r_version, lmu_flags);
else if (ls->lm_type == LD_LM_SANLOCK)
rv = lm_unlock_sanlock(ls, r, r_version, lmu_flags);
else
return -1;
if (act)
act->lm_rv = rv;
return rv;
}
static int lm_hosts(struct lockspace *ls, int notify)
{
if (ls->lm_type == LD_LM_DLM)
return lm_hosts_dlm(ls, notify);
else if (ls->lm_type == LD_LM_SANLOCK)
return lm_hosts_sanlock(ls, notify);
return -1;
}
static void lm_rem_resource(struct lockspace *ls, struct resource *r)
{
if (ls->lm_type == LD_LM_DLM)
lm_rem_resource_dlm(ls, r);
else if (ls->lm_type == LD_LM_SANLOCK)
lm_rem_resource_sanlock(ls, r);
}
static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset)
{
if (ls->lm_type == LD_LM_DLM)
return 0;
else if (ls->lm_type == LD_LM_SANLOCK)
return lm_find_free_lock_sanlock(ls, free_offset);
return -1;
}
/*
* While adopting locks, actions originate from the adopt_locks()
* function, not from a client. So, these actions (flagged ADOPT),
* should be passed back to the adopt_locks() function through the
* adopt_results list, and not be sent back to a client via the
* client_list/client_thread.
*/
static void add_client_result(struct action *act)
{
if (act->flags & LD_AF_NO_CLIENT) {
log_debug("internal action done op %s mode %s result %d vg %s",
op_str(act->op), mode_str(act->mode), act->result, act->vg_name);
free_action(act);
return;
}
pthread_mutex_lock(&client_mutex);
if (act->flags & LD_AF_ADOPT)
list_add_tail(&act->list, &adopt_results);
else
list_add_tail(&act->list, &client_results);
pthread_cond_signal(&client_cond);
pthread_mutex_unlock(&client_mutex);
}
static struct lock *find_lock_client(struct resource *r, uint32_t client_id)
{
struct lock *lk;
list_for_each_entry(lk, &r->locks, list) {
if (lk->client_id == client_id)
return lk;
}
return NULL;
}
static struct lock *find_lock_persistent(struct resource *r)
{
struct lock *lk;
list_for_each_entry(lk, &r->locks, list) {
if (lk->flags & LD_LF_PERSISTENT)
return lk;
}
return NULL;
}
static struct action *find_action_client(struct resource *r, uint32_t client_id)
{
struct action *act;
list_for_each_entry(act, &r->actions, list) {
if (act->client_id != client_id)
continue;
return act;
}
return NULL;
}
static void add_work_action(struct action *act)
{
pthread_mutex_lock(&worker_mutex);
if (!worker_stop) {
list_add_tail(&act->list, &worker_list);
pthread_cond_signal(&worker_cond);
}
pthread_mutex_unlock(&worker_mutex);
}
static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
{
struct lock *lk;
struct val_blk vb;
uint32_t new_version = 0;
int inval_meta;
int rv = 0;
memset(&vb, 0, sizeof(vb));
r->last_client_id = act->client_id;
if (r->type == LD_RT_LV)
log_debug("S %s R %s res_lock cl %u mode %s (%s)", ls->name, r->name, act->client_id, mode_str(act->mode), act->lv_name);
else
log_debug("S %s R %s res_lock cl %u mode %s", ls->name, r->name, act->client_id, mode_str(act->mode));
if (r->mode == LD_LK_SH && act->mode == LD_LK_SH)
goto add_lk;
if (r->type == LD_RT_LV && act->lv_args[0])
memcpy(r->lv_args, act->lv_args, MAX_ARGS);
rv = lm_lock(ls, r, act->mode, act, &vb, retry, act->flags & LD_AF_ADOPT);
if (r->use_vb)
log_debug("S %s R %s res_lock rv %d read vb %x %x %u",
ls->name, r->name, rv, vb.version, vb.flags, vb.r_version);
else
log_debug("S %s R %s res_lock rv %d", ls->name, r->name, rv);
if (rv < 0)
return rv;
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
act->flags |= LD_AF_DUP_GL_LS;
/*
* Check new lvb values to decide if lvmetad cache should
* be invalidated. When we need to invalidate the lvmetad
* cache, but don't have a usable r_version from the lvb,
* send lvmetad new_version 0 which causes it to invalidate
* the VG metdata without comparing against the currently
* cached VG seqno.
*/
inval_meta = 0;
if (!r->use_vb) {
/* LV locks don't use an lvb. */
} else if (vb.version && ((vb.version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
log_error("S %s R %s res_lock invalid val_blk version %x flags %x r_version %u",
ls->name, r->name, vb.version, vb.flags, vb.r_version);
inval_meta = 1;
new_version = 0;
rv = -EINVAL;
} else if (vb.r_version && (vb.r_version == r->version)) {
/*
* Common case when the version hasn't changed.
* Do nothing.
*/
} else if (r->version && vb.r_version && (vb.r_version > r->version)) {
/*
* Common case when the version has changed. Another host
* has changed the data protected by the lock since we last
* acquired it, and increased r_version so we know that our
* cache is invalid.
*/
log_debug("S %s R %s res_lock got version %u our %u",
ls->name, r->name, vb.r_version, r->version);
r->version = vb.r_version;
new_version = vb.r_version;
r->version_zero_valid = 0;
inval_meta = 1;
} else if (r->version_zero_valid && !vb.r_version) {
/*
* The lvb is in a persistent zero state, which will end
* once someone uses the lock and writes a new lvb value.
* Do nothing.
*/
log_debug("S %s R %s res_lock version_zero_valid still zero", ls->name, r->name);
} else if (r->version_zero_valid && vb.r_version) {
/*
* Someone has written to the lvb after it was in a
* persistent zero state. Begin tracking normal
* non-zero changes. We may or may not have known
* about a previous non-zero version (in r->version).
* If we did, it means the lvb content was lost and
* has now been reinitialized.
*
* If the new reinitialized value is less than the
* previous non-zero value in r->version, then something
* unusual has happened. For a VG lock, it probably
* means the VG was removed and recreated. Invalidate
* our cache and begin using the new VG version. For
* a GL lock, another host may have reinitialized a
* lost/zero lvb with a value less than we'd seen
* before. Invalidate the cache, and begin using
* the lower version (or continue using our old
* larger version?)
*/
if (r->version && (r->version >= vb.r_version)) {
log_debug("S %s R %s res_lock version_zero_valid got version %u less than our %u",
ls->name, r->name, vb.r_version, r->version);
new_version = 0;
} else {
log_debug("S %s R %s res_lock version_zero_valid got version %u our %u",
ls->name, r->name, vb.r_version, r->version);
new_version = vb.r_version;
}
r->version = vb.r_version;
r->version_zero_valid = 0;
inval_meta = 1;
} else if (!r->version && vb.r_version) {
/*
* The first time we've acquired the lock and seen the lvb.
*/
log_debug("S %s R %s res_lock initial version %u", ls->name, r->name, vb.r_version);
r->version = vb.r_version;
inval_meta = 1;
new_version = vb.r_version;
r->version_zero_valid = 0;
} else if (!r->version && !vb.r_version) {
/*
* The lock may have never been used to change something.
* (e.g. a new sanlock GL?)
*/
log_debug("S %s R %s res_lock all versions zero", ls->name, r->name);
if (!r->version_zero_valid) {
inval_meta = 1;
new_version = 0;
}
r->version_zero_valid = 1;
} else if (r->version && !vb.r_version) {
/*
* The lvb content has been lost or never been initialized.
* It can be lost during dlm recovery when the master node
* is removed.
*
* If we're the next to write the lvb, reinitialze it to the
* new VG seqno, or a new GL counter larger than was seen by
* any hosts before (how to estimate that?)
*
* If we see non-zero values before we next write to it, use
* those values.
*
* While the lvb values remain zero, the data for the lock
* is unchanged and we don't need to invalidate metadata.
*/
if ((ls->lm_type == LD_LM_DLM) && !vb.version && !vb.flags)
log_debug("S %s R %s res_lock all lvb content is blank",
ls->name, r->name);
log_debug("S %s R %s res_lock our version %u got vb %x %x %u",
ls->name, r->name, r->version, vb.version, vb.flags, vb.r_version);
r->version_zero_valid = 1;
inval_meta = 1;
new_version = 0;
} else if (r->version && vb.r_version && (vb.r_version < r->version)) {
/*
* The lvb value has gone backwards, which shouldn't generally happen,
* but could when the dlm lvb is lost and reinitialized, or the VG
* is removed and recreated.
*
* If this is a VG lock, it probably means the VG has been removed
* and recreated while we had the dlm lockspace running.
* FIXME: how does the cache validation and replacement in lvmetad
* work in this case?
*/
log_debug("S %s R %s res_lock got version %u less than our version %u",
ls->name, r->name, vb.r_version, r->version);
r->version = vb.r_version;
inval_meta = 1;
new_version = 0;
r->version_zero_valid = 0;
} else {
log_debug("S %s R %s res_lock undefined vb condition vzv %d our version %u vb %x %x %u",
ls->name, r->name, r->version_zero_valid, r->version,
vb.version, vb.flags, vb.r_version);
}
if (vb.version && vb.r_version && (vb.flags & VBF_REMOVED)) {
/* Should we set ls->thread_stop = 1 ? */
log_debug("S %s R %s res_lock vb flag REMOVED",
ls->name, r->name);
rv = -EREMOVED;
}
if (!lvmetad_connected && inval_meta)
log_debug("S %s R %s res_lock no lvmetad connection to invalidate",
ls->name, r->name);
/*
* r is vglk: tell lvmetad to set the vg invalid
* flag, and provide the new r_version. If lvmetad finds
* that its cached vg has seqno less than the value
* we send here, it will set the vg invalid flag.
* lvm commands that read the vg from lvmetad, will
* see the invalid flag returned, will reread the
* vg from disk, update the lvmetad copy, and go on.
*
* r is global: tell lvmetad to set the global invalid
* flag. When commands see this flag returned from lvmetad,
* they will reread metadata from disk, update the lvmetad
* caches, and tell lvmetad to set global invalid to 0.
*/
if (lvmetad_connected && inval_meta && (r->type == LD_RT_VG)) {
daemon_reply reply;
char *uuid;
log_debug("S %s R %s res_lock set lvmetad vg version %u",
ls->name, r->name, new_version);
if (!ls->vg_uuid[0] || !strcmp(ls->vg_uuid, "none"))
uuid = (char *)"none";
else
uuid = ls->vg_uuid;
pthread_mutex_lock(&lvmetad_mutex);
reply = daemon_send_simple(lvmetad_handle, "set_vg_info",
"token = %s", "skip",
"uuid = %s", uuid,
"name = %s", ls->vg_name,
"version = %d", (int)new_version,
NULL);
pthread_mutex_unlock(&lvmetad_mutex);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("set_vg_info in lvmetad failed %d", reply.error);
daemon_reply_destroy(reply);
}
if (lvmetad_connected && inval_meta && (r->type == LD_RT_GL)) {
daemon_reply reply;
log_debug("S %s R %s res_lock set lvmetad global invalid",
ls->name, r->name);
pthread_mutex_lock(&lvmetad_mutex);
reply = daemon_send_simple(lvmetad_handle, "set_global_info",
"token = %s", "skip",
"global_invalid = %d", 1,
NULL);
pthread_mutex_unlock(&lvmetad_mutex);
if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
log_error("set_global_info in lvmetad failed %d", reply.error);
daemon_reply_destroy(reply);
}
/*
* Record the new lock state.
*/
r->mode = act->mode;
add_lk:
if (r->mode == LD_LK_SH)
r->sh_count++;
if (!(lk = alloc_lock()))
return -ENOMEM;
lk->client_id = act->client_id;
lk->mode = act->mode;
if (act->flags & LD_AF_PERSISTENT) {
lk->flags |= LD_LF_PERSISTENT;
lk->client_id = 0;
}
/*
* LV_LOCK means the action acquired the lv lock in the lock manager
* (as opposed to finding that the lv lock was already held). If
* the client for this LV_LOCK action fails before we send the result,
* then we automatically unlock the lv since the lv wasn't activated.
* (There will always be an odd chance the lv lock is held while the
* lv is not active, but this helps.) The most common case where this
* is helpful is when the lv lock operation is slow/delayed and the
* command is canceled by the user.
*
* LV_UNLOCK means the lv unlock action was generated by lvmlockd when
* it tried to send the reply for an lv lock action (with LV_LOCK set),
* and failed to send the reply to the client/command. The
* last_client_id saved on the resource is compared to this LV_UNLOCK
* action before the auto unlock is done in case another action locked
* the lv between the failed client lock action and the auto unlock.
*/
if (r->type == LD_RT_LV)
act->flags |= LD_AF_LV_LOCK;
list_add_tail(&lk->list, &r->locks);
return rv;
}
static int res_convert(struct lockspace *ls, struct resource *r,
struct lock *lk, struct action *act)
{
uint32_t r_version;
int rv;
r->last_client_id = act->client_id;
log_debug("S %s R %s res_convert cl %u mode %d", ls->name, r->name, act->client_id, act->mode);
if (act->mode == LD_LK_EX && lk->mode == LD_LK_SH && r->sh_count > 1)
return -EAGAIN;
/*
* lm_convert() writes new version (from ex)
* Same as lm_unlock()
*/
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
lk->version = r->version;
r_version = r->version;
log_debug("S %s R %s res_convert r_version inc %u",
ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
r->version = lk->version;
r_version = r->version;
log_debug("S %s R %s res_convert r_version new %u", ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_convert(ls, r, act->mode, act, r_version);
if (rv < 0) {
log_error("S %s R %s res_convert lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_convert lm done", ls->name, r->name);
if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
r->sh_count = 1;
} else if (lk->mode == LD_LK_SH && act->mode == LD_LK_EX) {
r->sh_count = 0;
} else {
/* should not be possible */
log_error("S %s R %s res_convert invalid modes %d %d",
ls->name, r->name, lk->mode, act->mode);
return -1;
}
r->mode = act->mode;
lk->mode = act->mode;
return 0;
}
static int res_cancel(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct action *cact;
/*
* a client can cancel its own non-persistent lock requests,
* when could this happen?
*
* a client can cancel other client's persistent lock requests,
* when could this happen?
*/
if (act->flags & LD_AF_PERSISTENT) {
list_for_each_entry(cact, &r->actions, list) {
if (!(cact->flags & LD_AF_PERSISTENT))
continue;
goto do_cancel;
}
} else {
cact = find_action_client(r, act->client_id);
if (cact)
goto do_cancel;
}
return -ENOENT;
do_cancel:
log_debug("S %s R %s res_cancel cl %u", ls->name, r->name, cact->client_id);
cact->result = -ECANCELED;
list_del(&cact->list);
add_client_result(cact);
return -ECANCELED;
}
/*
* lm_unlock() writes new a r_version (from ex)
*
* The r_version of the vg resource is incremented if
* an "update" was received for the vg lock. The update
* contains the new vg seqno from the vg metadata which is
* used as the r_version.
*
* The r_version of the global resource is automatically
* incremented when it is unlocked from ex mode.
*
* r_version is incremented every time a command releases
* the global lock from ex.
*/
/*
* persistent locks will not be unlocked for OP_CLOSE/act_close
* because act_close->flags does not have the PERSISTENT flag
* set, and a persistent lk->client_id is zero, which will not
* match the client in act_close->client_id.
*/
static int res_unlock(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct lock *lk;
uint32_t r_version;
int rv;
if (act->flags & LD_AF_PERSISTENT) {
lk = find_lock_persistent(r);
if (lk)
goto do_unlock;
} else {
lk = find_lock_client(r, act->client_id);
if (lk)
goto do_unlock;
}
if (act->op != LD_OP_CLOSE)
log_debug("S %s R %s res_unlock cl %u no locks", ls->name, r->name, act->client_id);
return -ENOENT;
do_unlock:
if ((act->flags & LD_AF_LV_UNLOCK) && (r->last_client_id != act->client_id)) {
log_debug("S %s R %s res_unlock cl %u for failed client ignored, last client %u",
ls->name, r->name, act->client_id, r->last_client_id);
return -ENOENT;
}
r->last_client_id = act->client_id;
if (act->op == LD_OP_CLOSE)
log_debug("S %s R %s res_unlock cl %u from close", ls->name, r->name, act->client_id);
else if (r->type == LD_RT_LV)
log_debug("S %s R %s res_unlock cl %u (%s)", ls->name, r->name, act->client_id, act->lv_name);
else
log_debug("S %s R %s res_unlock cl %u", ls->name, r->name, act->client_id);
/* send unlock to lm when last sh lock is unlocked */
if (lk->mode == LD_LK_SH) {
r->sh_count--;
if (r->sh_count > 0) {
log_debug("S %s R %s res_unlock sh_count %u", ls->name, r->name, r->sh_count);
goto rem_lk;
}
}
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
lk->version = r->version;
r_version = r->version;
r->version_zero_valid = 0;
log_debug("S %s R %s res_unlock r_version inc %u", ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
r->version = lk->version;
r_version = r->version;
r->version_zero_valid = 0;
log_debug("S %s R %s res_unlock r_version new %u",
ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_unlock(ls, r, act, r_version, 0);
if (rv < 0) {
/* should never happen, retry? */
log_error("S %s R %s res_unlock lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_unlock lm done", ls->name, r->name);
rem_lk:
list_del(&lk->list);
free_lock(lk);
if (list_empty(&r->locks))
r->mode = LD_LK_UN;
return 0;
}
static int res_update(struct lockspace *ls, struct resource *r,
struct action *act)
{
struct lock *lk;
lk = find_lock_client(r, act->client_id);
if (!lk) {
log_error("S %s R %s res_update cl %u lock not found",
ls->name, r->name, act->client_id);
return -ENOENT;
}
if (r->mode != LD_LK_EX) {
log_error("S %s R %s res_update cl %u version on non-ex lock",
ls->name, r->name, act->client_id);
return -EINVAL;
}
/* lk version will be written to lm by unlock */
if (act->flags & LD_AF_NEXT_VERSION)
lk->version = r->version + 1;
else
lk->version = act->version;
log_debug("S %s R %s res_update cl %u lk version to %u", ls->name, r->name, act->client_id, lk->version);
return 0;
}
/*
* There is nothing to deallocate when freeing a dlm LV, the LV
* will simply be unlocked by rem_resource.
*/
static int free_lv(struct lockspace *ls, struct resource *r)
{
if (ls->lm_type == LD_LM_SANLOCK)
return lm_free_lv_sanlock(ls, r);
else if (ls->lm_type == LD_LM_DLM)
return 0;
else
return -EINVAL;
}
/*
* NB. we can't do this if sanlock is holding any locks on
* the resource; we'd be rewriting the resource from under
* sanlock and would confuse or break it badly. We don't
* know what another host is doing, so these must be used
* very carefully.
*/
static int res_able(struct lockspace *ls, struct resource *r,
struct action *act)
{
int rv;
if (ls->lm_type != LD_LM_SANLOCK) {
log_error("enable/disable only applies to sanlock");
return -EINVAL;
}
if (r->type != LD_RT_GL) {
log_error("enable/disable only applies to global lock");
return -EINVAL;
}
if (r->mode != LD_LK_UN) {
log_error("enable/disable only allowed on unlocked resource");
return -EINVAL;
}
if (act->op == LD_OP_ENABLE && gl_lsname_sanlock[0]) {
log_error("disable global lock in %s before enable in %s",
gl_lsname_sanlock, ls->name);
return -EINVAL;
}
if ((act->op == LD_OP_DISABLE) && (act->flags & LD_AF_EX_DISABLE)) {
rv = lm_ex_disable_gl_sanlock(ls);
goto out;
}
rv = lm_able_gl_sanlock(ls, act->op == LD_OP_ENABLE);
out:
return rv;
}
/*
* Go through queued actions, and make lock/unlock calls on the resource
* based on the actions and the existing lock state.
*
* All lock operations sent to the lock manager are non-blocking.
* This is because sanlock does not support lock queueing.
* Eventually we could enhance this to take advantage of lock
* queueing when available (i.e. for the dlm).
*
* act_close_list: list of CLOSE actions, identifying clients that have
* closed/terminated their lvmlockd connection, and whose locks should
* be released. Do not remove these actions from act_close_list.
*
* retry_out: set to 1 if the lock manager said we should retry,
* meaning we should call res_process() again in a short while to retry.
*/
static void res_process(struct lockspace *ls, struct resource *r,
struct list_head *act_close_list, int *retry_out)
{
struct action *act, *safe, *act_close;
struct lock *lk;
int lm_retry;
int rv;
/*
* handle version updates for ex locks
* (new version will be written by unlock)
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_UPDATE) {
rv = res_update(ls, r, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
}
/*
* handle explicit unlock actions
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if ((act->op == LD_OP_LOCK) &&
(act->mode == LD_LK_IV || act->mode == LD_LK_NL)) {
act->result = -EINVAL;
list_del(&act->list);
add_client_result(act);
}
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
rv = res_unlock(ls, r, act);
if (rv == -ENOENT && (act->flags & LD_AF_UNLOCK_CANCEL))
rv = res_cancel(ls, r, act);
/*
* possible unlock results:
* 0: unlock succeeded
* -ECANCELED: cancel succeeded
* -ENOENT: nothing to unlock or cancel
*/
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
}
/*
* handle implicit unlocks due to client exit,
* also clear any outstanding actions for the client
*/
list_for_each_entry(act_close, act_close_list, list) {
res_unlock(ls, r, act_close);
res_cancel(ls, r, act_close);
}
/*
* handle freeing a lock for an lv that has been removed
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
log_debug("S %s R %s free_lv", ls->name, r->name);
rv = free_lv(ls, r);
act->result = rv;
list_del(&act->list);
add_client_result(act);
goto r_free;
}
}
/*
* handle enable/disable
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE) {
rv = res_able(ls, r, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
if (!rv && act->op == LD_OP_DISABLE) {
log_debug("S %s R %s free disabled", ls->name, r->name);
goto r_free;
}
}
}
/*
* transient requests on existing transient locks
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
continue;
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* convert below */
/*
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
*/
continue;
} else {
/* success */
r->last_client_id = act->client_id;
act->result = -EALREADY;
list_del(&act->list);
add_client_result(act);
}
}
/*
* persistent requests on existing persistent locks
*
* persistent locks are not owned by a client, so any
* existing with matching mode satisfies a request.
* only one persistent lock is kept on a resource.
* a single "unowned" persistent lock satisfies
* any/multiple client requests for a persistent lock.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (!(act->flags & LD_AF_PERSISTENT))
continue;
lk = find_lock_persistent(r);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* convert below */
/*
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
*/
continue;
} else {
/* success */
r->last_client_id = act->client_id;
act->result = -EALREADY;
list_del(&act->list);
add_client_result(act);
}
}
/*
* transient requests with existing persistent locks
*
* Just grant the transient request and do not
* keep a record of it. Assume that the persistent
* lock will not go away while the transient lock
* is needed.
*
* This would be used when an ex, persistent lv lock
* exists from activation, and then something like
* lvextend asks for a transient ex lock to change
* the lv. The lv could not be unlocked by deactivation
* while the lvextend was running.
*
* The logic here for mixing T/P locks is not general
* support; there are a number of cases where it will
* not work: updating version number (lv locks have
* none), ex locks from multiple clients will not
* conflict, explicit un of the transient lock will fail.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
continue;
lk = find_lock_persistent(r);
if (!lk)
continue;
if ((lk->mode == LD_LK_EX) ||
(lk->mode == LD_LK_SH && act->mode == LD_LK_SH)) {
r->last_client_id = act->client_id;
act->result = 0;
list_del(&act->list);
add_client_result(act);
} else {
/* persistent lock is sh, transient request is ex */
/* FIXME: can we remove this case? do a convert here? */
log_debug("res_process %s existing persistent lock new transient", r->name);
r->last_client_id = act->client_id;
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
}
}
/*
* persistent requests with existing transient locks
*
* If a client requests a P (persistent) lock for a T (transient)
* lock it already holds, we can just change T to P. Fail if the
* same happens for locks from different clients. Changing
* another client's lock from T to P may cause problems
* if that client tries to unlock or update version.
*
* I don't think this P/T combination will be used.
* It might be used if a command was able to take a P
* vg lock, in which case the T vg lock would already
* be held for reading. If the T lock was sh, it would
* be converted to P ex. If the T/P modes matched, the
* lock could just be changed from T to P.
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (!(act->flags & LD_AF_PERSISTENT))
continue;
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode != act->mode) {
/* FIXME: convert and change to persistent? */
log_debug("res_process %s existing transient lock new persistent", r->name);
r->last_client_id = act->client_id;
act->result = -EEXIST;
list_del(&act->list);
add_client_result(act);
} else {
r->last_client_id = act->client_id;
lk->flags |= LD_LF_PERSISTENT;
lk->client_id = 0;
act->result = 0;
list_del(&act->list);
add_client_result(act);
}
}
/*
* convert mode of existing locks
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->flags & LD_AF_PERSISTENT)
lk = find_lock_persistent(r);
else
lk = find_lock_client(r, act->client_id);
if (!lk)
continue;
if (lk->mode == act->mode) {
/* should never happen, should be found above */
log_error("convert same mode");
continue;
}
/* convert fails immediately, no EAGAIN retry */
rv = res_convert(ls, r, lk, act);
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
/*
* Cases above are all requests addressed by existing locks.
* Below handles the rest. Transient and persistent are
* handled the same, except
* - if mode of existing lock is incompat with requested,
* leave the act on r->actions
* - if r mode is EX, any lock action is blocked, just quit
*
* Retry a lock request that fails due to a lock conflict (-EAGAIN):
* if we have not exceeded max retries and lm sets lm_retry (sanlock
* transient conflicts from shared lock implementation), or r type
* is gl or vg (transient real conflicts we want to hide from command).
* lv lock conflicts won't be transient so don't retry them.
*/
if (r->mode == LD_LK_EX)
return;
/*
* r mode is SH or UN, pass lock-sh actions to lm
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
/* grant in order, so break here */
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX)
break;
if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
lm_retry = 0;
rv = res_lock(ls, r, act, &lm_retry);
if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
act->retries++;
*retry_out = 1;
} else {
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
if (rv == -EUNATCH)
goto r_free;
}
}
/*
* r mode is SH, any ex lock action is blocked, just quit
*/
if (r->mode == LD_LK_SH)
return;
/*
* r mode is UN, pass lock-ex action to lm
*/
list_for_each_entry_safe(act, safe, &r->actions, list) {
if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
lm_retry = 0;
rv = res_lock(ls, r, act, &lm_retry);
if ((rv == -EAGAIN) &&
(act->retries <= act->max_retries) &&
(lm_retry || (r->type != LD_RT_LV))) {
/* leave act on list */
log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
act->retries++;
*retry_out = 1;
} else {
act->result = rv;
list_del(&act->list);
add_client_result(act);
}
if (rv == -EUNATCH)
goto r_free;
break;
}
}
return;
r_free:
/* For the EUNATCH case it may be possible there are queued actions? */
list_for_each_entry_safe(act, safe, &r->actions, list) {
log_error("S %s R %s res_process r_free cancel %s client %d",
ls->name, r->name, op_str(act->op), act->client_id);
act->result = -ECANCELED;
list_del(&act->list);
add_client_result(act);
}
log_debug("S %s R %s res_process free", ls->name, r->name);
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
#define LOCKS_EXIST_ANY 1
#define LOCKS_EXIST_GL 2
#define LOCKS_EXIST_VG 3
#define LOCKS_EXIST_LV 4
static int for_each_lock(struct lockspace *ls, int locks_do)
{
struct resource *r;
struct lock *lk;
list_for_each_entry(r, &ls->resources, list) {
list_for_each_entry(lk, &r->locks, list) {
if (locks_do == LOCKS_EXIST_ANY)
return 1;
if (locks_do == LOCKS_EXIST_GL && r->type == LD_RT_GL)
return 1;
if (locks_do == LOCKS_EXIST_VG && r->type == LD_RT_VG)
return 1;
if (locks_do == LOCKS_EXIST_LV && r->type == LD_RT_LV)
return 1;
}
}
return 0;
}
static int clear_locks(struct lockspace *ls, int free_vg, int drop_vg)
{
struct resource *r, *r_safe;
struct lock *lk, *lk_safe;
struct action *act, *act_safe;
uint32_t lk_version;
uint32_t r_version;
int lk_count = 0;
int rv;
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
lk_version = 0;
list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
lk_count++;
/*
* Stopping a lockspace shouldn't happen with LV locks
* still held, but it will be stopped with GL and VG
* locks held. The drop_vg case may see LV locks.
*/
if (lk->flags & LD_LF_PERSISTENT && !drop_vg)
log_error("S %s R %s clear lock persistent", ls->name, r->name);
else
log_debug("S %s R %s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id);
if (lk->version > lk_version)
lk_version = lk->version;
list_del(&lk->list);
free_lock(lk);
}
if (r->mode == LD_LK_UN)
goto r_free;
if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
r->version++;
r_version = r->version;
log_debug("S %s R %s clear_locks r_version inc %u",
ls->name, r->name, r_version);
} else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk_version > r->version)) {
r->version = lk_version;
r_version = r->version;
log_debug("S %s R %s clear_locks r_version new %u",
ls->name, r->name, r_version);
} else {
r_version = 0;
}
rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0);
if (rv < 0) {
/* should never happen */
log_error("S %s R %s clear_locks free %d drop %d lm unlock error %d",
ls->name, r->name, free_vg, drop_vg, rv);
}
list_for_each_entry_safe(act, act_safe, &r->actions, list) {
log_error("S %s R %s clear_locks cancel %s client %d",
ls->name, r->name, op_str(act->op), act->client_id);
act->result = -ECANCELED;
list_del(&act->list);
add_client_result(act);
}
r_free:
log_debug("S %s R %s free", ls->name, r->name);
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
return lk_count;
}
/*
* find and return the resource that is referenced by the action
* - there is a single gl resource per lockspace
* - there is a single vg resource per lockspace
* - there can be many lv resources per lockspace, compare names
*/
static struct resource *find_resource_act(struct lockspace *ls,
struct action *act,
int nocreate)
{
struct resource *r;
list_for_each_entry(r, &ls->resources, list) {
if (r->type != act->rt)
continue;
if (r->type == LD_RT_GL && act->rt == LD_RT_GL)
return r;
if (r->type == LD_RT_VG && act->rt == LD_RT_VG)
return r;
if (r->type == LD_RT_LV && act->rt == LD_RT_LV &&
!strcmp(r->name, act->lv_uuid))
return r;
}
if (nocreate)
return NULL;
if (!(r = alloc_resource()))
return NULL;
r->type = act->rt;
r->mode = LD_LK_UN;
if (r->type == LD_RT_GL) {
strncpy(r->name, R_NAME_GL, MAX_NAME);
r->use_vb = 1;
} else if (r->type == LD_RT_VG) {
strncpy(r->name, R_NAME_VG, MAX_NAME);
r->use_vb = 1;
} else if (r->type == LD_RT_LV) {
strncpy(r->name, act->lv_uuid, MAX_NAME);
r->use_vb = 0;
}
list_add_tail(&r->list, &ls->resources);
return r;
}
static void free_ls_resources(struct lockspace *ls)
{
struct resource *r, *r_safe;
list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
lm_rem_resource(ls, r);
list_del(&r->list);
free_resource(r);
}
}
/*
* ls is the vg being removed that holds the global lock.
* check if any other vgs will be left without a global lock.
*/
static int other_sanlock_vgs_exist(struct lockspace *ls_rem)
{
struct lockspace *ls;
list_for_each_entry(ls, &lockspaces, list) {
if (ls->lm_type != LD_LM_SANLOCK)
continue;
if (!strcmp(ls->name, ls_rem->name))
continue;
log_debug("other sanlock vg exists %s", ls->name);
return 1;
}
return 0;
}
/*
* LOCK is the main thing we're interested in; the others are unlikely.
*/
static int process_op_during_kill(struct action *act)
{
if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN)
return 1;
switch (act->op) {
case LD_OP_LOCK:
case LD_OP_ENABLE:
case LD_OP_DISABLE:
case LD_OP_UPDATE:
case LD_OP_RENAME_BEFORE:
case LD_OP_RENAME_FINAL:
case LD_OP_FIND_FREE_LOCK:
return 0;
};
return 1;
}
/*
* Process actions queued for this lockspace by
* client_recv_action / add_lock_action.
*
* The lockspace_thread can touch its own ls struct without holding
* lockspaces_mutex until it sets ls->thread_done, after which it
* cannot touch ls without holding lockspaces_mutex.
*/
#define LOCK_RETRY_MS 1000 /* milliseconds to delay between retry */
static void *lockspace_thread_main(void *arg_in)
{
struct lockspace *ls = arg_in;
struct resource *r, *r2;
struct action *add_act, *act, *safe;
struct action *act_op_free = NULL;
struct list_head tmp_act;
struct list_head act_close;
int free_vg = 0;
int drop_vg = 0;
int error = 0;
int adopt_flag = 0;
int wait_flag = 0;
int retry;
int rv;
INIT_LIST_HEAD(&act_close);
/* first action may be client add */
pthread_mutex_lock(&ls->mutex);
act = NULL;
add_act = NULL;
if (!list_empty(&ls->actions)) {
act = list_first_entry(&ls->actions, struct action, list);
if (act->op == LD_OP_START) {
add_act = act;
list_del(&add_act->list);
if (add_act->flags & LD_AF_WAIT)
wait_flag = 1;
if (add_act->flags & LD_AF_ADOPT)
adopt_flag = 1;
}
}
pthread_mutex_unlock(&ls->mutex);
log_debug("S %s lm_add_lockspace %s wait %d adopt %d",
ls->name, lm_str(ls->lm_type), wait_flag, adopt_flag);
/*
* The prepare step does not wait for anything and is quick;
* it tells us if the parameters are valid and the lm is running.
*/
error = lm_prepare_lockspace(ls, add_act);
if (add_act && (!wait_flag || error)) {
/* send initial join result back to client */
add_act->result = error;
add_client_result(add_act);
add_act = NULL;
}
/*
* The actual lockspace join can take a while.
*/
if (!error) {
error = lm_add_lockspace(ls, add_act, adopt_flag);
log_debug("S %s lm_add_lockspace done %d", ls->name, error);
if (ls->sanlock_gl_enabled && gl_lsname_sanlock[0] &&
strcmp(ls->name, gl_lsname_sanlock))
sanlock_gl_dup = 1;
if (add_act) {
/* send final join result back to client */
add_act->result = error;
add_client_result(add_act);
}
}
pthread_mutex_lock(&ls->mutex);
if (error) {
ls->thread_stop = 1;
ls->create_fail = 1;
} else {
ls->create_done = 1;
}
pthread_mutex_unlock(&ls->mutex);
if (error)
goto out_act;
while (1) {
pthread_mutex_lock(&ls->mutex);
while (!ls->thread_work) {
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
goto out_rem;
}
pthread_cond_wait(&ls->cond, &ls->mutex);
}
/*
* Process all the actions queued for this lockspace.
* The client thread queues actions on ls->actions.
*
* Here, take all the actions off of ls->actions, and:
*
* - For lock operations, move the act to r->actions.
* These lock actions/operations processed by res_process().
*
* - For non-lock operations, e.g. related to managing
* the lockspace, process them in this loop.
*/
while (1) {
if (list_empty(&ls->actions)) {
ls->thread_work = 0;
break;
}
act = list_first_entry(&ls->actions, struct action, list);
if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
/* Continue processing until DROP_VG arrives. */
log_debug("S %s kill_vg", ls->name);
ls->kill_vg = 1;
list_del(&act->list);
act->result = 0;
add_client_result(act);
continue;
}
if (ls->kill_vg && !process_op_during_kill(act)) {
log_debug("S %s disallow op %s after kill_vg", ls->name, op_str(act->op));
list_del(&act->list);
act->result = -EVGKILLED;
add_client_result(act);
continue;
}
if (act->op == LD_OP_DROP_VG && act->rt == LD_RT_VG) {
/*
* If leases are released after i/o errors begin
* but before lvmlockctl --kill, then the VG is not
* killed, but drop is still needed to clean up the
* VG, so in that case there would be a drop op without
* a preceding kill op.
*/
if (!ls->kill_vg)
log_debug("S %s received drop without kill", ls->name);
log_debug("S %s drop_vg", ls->name);
ls->thread_work = 0;
ls->thread_stop = 1;
drop_vg = 1;
break;
}
if (act->op == LD_OP_STOP) {
/* thread_stop is already set */
ls->thread_work = 0;
break;
}
if (act->op == LD_OP_FREE && act->rt == LD_RT_VG) {
/* vgremove */
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv) {
/*
* Checking for hosts here in addition to after the
* main loop allows vgremove to fail and be rerun
* after the ls is stopped on other hosts.
*/
log_error("S %s lockspace hosts %d", ls->name, rv);
list_del(&act->list);
act->result = -EBUSY;
add_client_result(act);
continue;
}
ls->thread_work = 0;
ls->thread_stop = 1;
free_vg = 1;
break;
}
if (act->op == LD_OP_BUSY && act->rt == LD_RT_VG) {
log_debug("S %s checking if lockspace is busy", ls->name);
rv = lm_hosts(ls, 0);
if (rv)
act->result = -EBUSY;
else
act->result = 0;
list_del(&act->list);
add_client_result(act);
continue;
}
if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
/* vgrename */
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv) {
log_error("S %s lockspace hosts %d", ls->name, rv);
list_del(&act->list);
act->result = -EBUSY;
add_client_result(act);
continue;
}
ls->thread_work = 0;
ls->thread_stop = 1;
/* Do we want to check hosts again below like vgremove? */
break;
}
if (act->op == LD_OP_FIND_FREE_LOCK && act->rt == LD_RT_VG) {
uint64_t free_offset = 0;
log_debug("S %s find free lock", ls->name);
rv = lm_find_free_lock(ls, &free_offset);
log_debug("S %s find free lock %d offset %llu",
ls->name, rv, (unsigned long long)free_offset);
ls->free_lock_offset = free_offset;
list_del(&act->list);
act->result = rv;
add_client_result(act);
continue;
}
list_del(&act->list);
/* applies to all resources */
if (act->op == LD_OP_CLOSE) {
list_add(&act->list, &act_close);
continue;
}
/*
* All the other op's are for locking.
* Find the specific resource that the lock op is for,
* and add the act to the resource's list of lock ops.
*
* (This creates a new resource if the one named in
* the act is not found.)
*/
r = find_resource_act(ls, act, (act->op == LD_OP_FREE) ? 1 : 0);
if (!r) {
act->result = (act->op == LD_OP_FREE) ? -ENOENT : -ENOMEM;
add_client_result(act);
continue;
}
list_add_tail(&act->list, &r->actions);
log_debug("S %s R %s action %s %s", ls->name, r->name,
op_str(act->op), mode_str(act->mode));
}
pthread_mutex_unlock(&ls->mutex);
/*
* Process the lock operations that have been queued for each
* resource.
*/
retry = 0;
list_for_each_entry_safe(r, r2, &ls->resources, list)
res_process(ls, r, &act_close, &retry);
list_for_each_entry_safe(act, safe, &act_close, list) {
list_del(&act->list);
free_action(act);
}
if (retry) {
ls->thread_work = 1;
usleep(LOCK_RETRY_MS * 1000);
}
}
out_rem:
log_debug("S %s stopping", ls->name);
/*
* For sanlock, we need to unlock any existing locks
* before removing the lockspace, otherwise the sanlock
* daemon will kill us when the lockspace goes away.
* For dlm, we leave with force, so all locks will
* automatically be dropped when we leave the lockspace,
* so unlocking all before leaving could be skipped.
*
* Blindly dropping all existing locks must only be
* allowed in emergency/force situations, otherwise it's
* obviously dangerous, since the lock holders are still
* operating under the assumption that they hold the lock.
* drop_vg drops all existing locks, but should only
* happen when the VG access has been forcibly and
* succesfully terminated.
*
* For vgremove of a sanlock vg, the vg lock will be held,
* and possibly the gl lock if this vg holds the gl.
* sanlock vgremove wants to unlock-rename these locks.
*/
log_debug("S %s clearing locks", ls->name);
rv = clear_locks(ls, free_vg, drop_vg);
/*
* Tell any other hosts in the lockspace to leave it
* before we remove it (for vgremove). We do this
* before leaving the lockspace ourself because we
* need to be in the lockspace to see others.
*/
if (free_vg) {
log_debug("S %s checking for lockspace hosts", ls->name);
rv = lm_hosts(ls, 1);
if (rv)
log_error("S %s other lockspace hosts %d", ls->name, rv);
}
/*
* Leave the lockspace.
*/
rv = lm_rem_lockspace(ls, NULL, free_vg);
log_debug("S %s rem_lockspace done %d", ls->name, rv);
out_act:
/*
* Move remaining actions to results; this will usually (always?)
* be only the stop action.
*/
INIT_LIST_HEAD(&tmp_act);
pthread_mutex_lock(&ls->mutex);
list_for_each_entry_safe(act, safe, &ls->actions, list) {
if (act->op == LD_OP_FREE) {
act_op_free = act;
act->result = 0;
} else if (act->op == LD_OP_STOP)
act->result = 0;
else if (act->op == LD_OP_DROP_VG)
act->result = 0;
else if (act->op == LD_OP_RENAME_BEFORE)
act->result = 0;
else
act->result = -ENOLS;
list_del(&act->list);
list_add_tail(&act->list, &tmp_act);
}
pthread_mutex_unlock(&ls->mutex);
/*
* If this freed a sanlock vg that had gl enabled, and other sanlock
* vgs exist, return a flag so the command can warn that the gl has
* been removed and may need to be enabled in another sanlock vg.
*/
if (free_vg && ls->sanlock_gl_enabled && act_op_free) {
pthread_mutex_lock(&lockspaces_mutex);
if (other_sanlock_vgs_exist(ls))
act_op_free->flags |= LD_AF_WARN_GL_REMOVED;
pthread_mutex_unlock(&lockspaces_mutex);
}
pthread_mutex_lock(&client_mutex);
list_for_each_entry_safe(act, safe, &tmp_act, list) {
list_del(&act->list);
list_add_tail(&act->list, &client_results);
}
pthread_cond_signal(&client_cond);
pthread_mutex_unlock(&client_mutex);
pthread_mutex_lock(&lockspaces_mutex);
ls->thread_done = 1;
ls->free_vg = free_vg;
ls->drop_vg = drop_vg;
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
global_dlm_lockspace_exists = 0;
pthread_mutex_unlock(&lockspaces_mutex);
/* worker_thread will join this thread, and free the ls */
pthread_mutex_lock(&worker_mutex);
worker_wake = 1;
pthread_cond_signal(&worker_cond);
pthread_mutex_unlock(&worker_mutex);
return NULL;
}
int lockspaces_empty(void)
{
int rv;
pthread_mutex_lock(&lockspaces_mutex);
rv = list_empty(&lockspaces);
pthread_mutex_unlock(&lockspaces_mutex);
return rv;
}
/*
* lockspaces_mutex is locked
*
* When duplicate sanlock global locks have been seen,
* this function has a secondary job of counting the
* number of lockspaces that exist with the gl enabled,
* with the side effect of setting sanlock_gl_dup back to
* zero when the duplicates have been removed/disabled.
*/
static struct lockspace *find_lockspace_name(char *ls_name)
{
struct lockspace *ls_found = NULL;
struct lockspace *ls;
int gl_count = 0;
list_for_each_entry(ls, &lockspaces, list) {
if (!strcmp(ls->name, ls_name))
ls_found = ls;
if (!sanlock_gl_dup && ls_found)
return ls_found;
if (sanlock_gl_dup && ls->sanlock_gl_enabled)
gl_count++;
}
/* this is the side effect we want from this function */
if (sanlock_gl_dup && gl_count < 2)
sanlock_gl_dup = 0;
return ls_found;
}
/*
* If lvm_<vg_name> is longer than max lockspace name (64) we just ignore the
* extra characters. For sanlock vgs, the name is shortened further to 48 in
* the sanlock code.
*/
static int vg_ls_name(const char *vg_name, char *ls_name)
{
if (strlen(vg_name) + 4 > MAX_NAME) {
log_error("vg name too long %s", vg_name);
return -1;
}
snprintf(ls_name, MAX_NAME, "%s%s", LVM_LS_PREFIX, vg_name);
return 0;
}
/* FIXME: add mutex for gl_lsname_ ? */
static void gl_ls_name(char *ls_name)
{
if (gl_use_dlm)
memcpy(ls_name, gl_lsname_dlm, MAX_NAME);
else if (gl_use_sanlock)
memcpy(ls_name, gl_lsname_sanlock, MAX_NAME);
else
memset(ls_name, 0, MAX_NAME);
}
/*
* When this function returns an error, the caller needs to deal
* with act (in the cases where act exists).
*/
static int add_lockspace_thread(const char *ls_name,
const char *vg_name,
const char *vg_uuid,
int lm_type, const char *vg_args,
struct action *act)
{
struct lockspace *ls, *ls2;
struct resource *r;
int rv;
log_debug("add_lockspace_thread %s %s version %u",
lm_str(lm_type), ls_name, act ? act->version : 0);
if (!(ls = alloc_lockspace()))
return -ENOMEM;
strncpy(ls->name, ls_name, MAX_NAME);
ls->lm_type = lm_type;
if (act)
ls->start_client_id = act->client_id;
if (vg_uuid)
strncpy(ls->vg_uuid, vg_uuid, 64);
if (vg_name)
strncpy(ls->vg_name, vg_name, MAX_NAME);
if (vg_args)
strncpy(ls->vg_args, vg_args, MAX_ARGS);
if (act)
ls->host_id = act->host_id;
if (!(r = alloc_resource())) {
free(ls);
return -ENOMEM;
}
r->type = LD_RT_VG;
r->mode = LD_LK_UN;
r->use_vb = 1;
strncpy(r->name, R_NAME_VG, MAX_NAME);
list_add_tail(&r->list, &ls->resources);
pthread_mutex_lock(&lockspaces_mutex);
ls2 = find_lockspace_name(ls->name);
if (ls2) {
if (ls2->thread_stop) {
log_debug("add_lockspace_thread %s exists and stopping", ls->name);
rv = -EAGAIN;
} else {
log_debug("add_lockspace_thread %s exists", ls->name);
rv = -EEXIST;
}
pthread_mutex_unlock(&lockspaces_mutex);
free_resource(r);
free(ls);
return rv;
}
/*
* act will be null when this lockspace is added automatically/internally
* and not by an explicit client action that wants a result.
*/
if (act)
list_add(&act->list, &ls->actions);
if (ls->lm_type == LD_LM_DLM && !strcmp(ls->name, gl_lsname_dlm))
global_dlm_lockspace_exists = 1;
list_add_tail(&ls->list, &lockspaces);
pthread_mutex_unlock(&lockspaces_mutex);
rv = pthread_create(&ls->thread, NULL, lockspace_thread_main, ls);
if (rv < 0) {
log_error("add_lockspace_thread %s pthread error %d %d", ls->name, rv, errno);
pthread_mutex_lock(&lockspaces_mutex);
list_del(&ls->list);
pthread_mutex_unlock(&lockspaces_mutex);
free_resource(r);
free(ls);
return rv;
}
return 0;
}
/*
* There is no add_sanlock_global_lockspace or
* rem_sanlock_global_lockspace because with sanlock,
* the global lockspace is one of the vg lockspaces.
*/
static int add_dlm_global_lockspace(struct action *act)
{
int rv;
if (global_dlm_lockspace_exists)
return 0;
/*
* FIXME: if the dlm global lockspace is started without a global
* lock request, insert an internal gl sh lock request?
*/
rv = add_lockspace_thread(gl_lsname_dlm, NULL, NULL, LD_LM_DLM, NULL, act);
if (rv < 0)
log_debug("add_dlm_global_lockspace add_lockspace_thread %d", rv);
/*
* EAGAIN may be returned for a short period because
* global_dlm_lockspace_exists is set to 0 before the
* ls is removed from the lockspaces list by the
* worker_thread.
*/
return rv;
}
/*
* If dlm gl lockspace is the only one left, then stop it.
* This is not used for an explicit rem_lockspace action from
* the client, only for auto remove.
*/
static int rem_dlm_global_lockspace(void)
{
struct lockspace *ls, *ls_gl = NULL;
int others = 0;
int rv = 0;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (!strcmp(ls->name, gl_lsname_dlm)) {
ls_gl = ls;
continue;
}
if (ls->thread_stop)
continue;
others++;
break;
}
if (others) {
rv = -EAGAIN;
goto out;
}
if (!ls_gl) {
rv = -ENOENT;
goto out;
}
ls = ls_gl;
pthread_mutex_lock(&ls->mutex);
ls->thread_stop = 1;
ls->thread_work = 1;
pthread_cond_signal(&ls->cond);
pthread_mutex_unlock(&ls->mutex);
rv = 0;
out:
pthread_mutex_unlock(&lockspaces_mutex);
return rv;
}
/*
* When the first dlm lockspace is added for a vg, automatically add a separate
* dlm lockspace for the global lock.
*
* For sanlock, a separate lockspace is not used for the global lock, but the
* gl lock lives in a vg lockspace, (although it's recommended to create a
* special vg dedicated to holding the gl).
*/
static int add_lockspace(struct action *act)
{
char ls_name[MAX_NAME+1];
int rv;
memset(ls_name, 0, sizeof(ls_name));
/*
* FIXME: I don't think this is used any more.
* Remove it, or add the ability to start the global
* dlm lockspace using lvmlockctl?
*/
if (act->rt == LD_RT_GL) {
if (gl_use_dlm) {
rv = add_dlm_global_lockspace(act);
return rv;
} else {
return -EINVAL;
}
}
if (act->rt == LD_RT_VG) {
if (gl_use_dlm)
add_dlm_global_lockspace(NULL);
vg_ls_name(act->vg_name, ls_name);
rv = add_lockspace_thread(ls_name, act->vg_name, act->vg_uuid,
act->lm_type, act->vg_args,
act);
if (rv)
log_debug("add_lockspace %s add_lockspace_thread %d", ls_name, rv);
return rv;
}
log_error("add_lockspace bad type %d", act->rt);
return -1;
}
/*
* vgchange --lock-stop vgname will lock the vg ex, then send a stop,
* so we exect to find the ex vg lock held here, and will automatically
* unlock it when stopping.
*
* Should we attempt to stop the lockspace containing the gl last?
*/
static int rem_lockspace(struct action *act)
{
struct lockspace *ls;
char ls_name[MAX_NAME+1];
int force = act->flags & LD_AF_FORCE;
int rt = act->rt;
if (act->rt == LD_RT_GL && act->lm_type != LD_LM_DLM)
return -EINVAL;
memset(ls_name, 0, sizeof(ls_name));
if (act->rt == LD_RT_GL)
gl_ls_name(ls_name);
else
vg_ls_name(act->vg_name, ls_name);
pthread_mutex_lock(&lockspaces_mutex);
ls = find_lockspace_name(ls_name);
if (!ls) {
pthread_mutex_unlock(&lockspaces_mutex);
return -ENOLS;
}
pthread_mutex_lock(&ls->mutex);
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
return -ESTALE;
}
if (!force && for_each_lock(ls, LOCKS_EXIST_LV)) {
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
return -EBUSY;
}
ls->thread_work = 1;
ls->thread_stop = 1;
list_add_tail(&act->list, &ls->actions);
pthread_cond_signal(&ls->cond);
pthread_mutex_unlock(&ls->mutex);
pthread_mutex_unlock(&lockspaces_mutex);
/*
* The dlm global lockspace was automatically added when
* the first dlm vg lockspace was added, now reverse that
* by automatically removing the dlm global lockspace when
* the last dlm vg lockspace is removed.
*/
if (rt == LD_RT_VG && gl_use_dlm)
rem_dlm_global_lockspace();
return 0;
}
/*
* count how many lockspaces started by this client are still starting;
* the client will use this to wait for all its start operations to finish
* (START_WAIT).
*/
static int count_lockspace_starting(uint32_t client_id)
{
struct lockspace *ls;
int count = 0;
int done = 0;
int fail = 0;
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if (ls->start_client_id != client_id)
continue;
if (!ls->create_done && !ls->create_fail) {
count++;
continue;
}
if (ls->create_done)
done++;
if (ls->create_fail)
fail++;
}
pthread_mutex_unlock(&lockspaces_mutex);
log_debug("count_lockspace_starting client %u count %d done %d fail %d",
client_id, count, done, fail);
return count;
}
/*
* Loop through all lockspaces, and:
* - if do_stop is set, stop any that are not stopped
* - if do_free is set, join any that are done stopping (and free ls)
*
* do_stop will not stop an ls with lv locks unless force is set.
*
* This function does not block or wait for anything.
*
* do_stop (no do_free):
* returns count of lockspaces that need stop (have locks and no force)
*
* do_free (no do_stop):
* returns count of lockspaces that are stopped and need freeing
*
* do_stop and do_free:
* returns sum of the previous two
*/
static int for_each_lockspace(int do_stop, int do_free, int do_force)
{
struct lockspace *ls, *safe;
int need_stop = 0;
int need_free = 0;
int stop_count = 0;
int free_count = 0;
int done;
int stop;
pthread_mutex_lock(&lockspaces_mutex);
if (do_stop) {
list_for_each_entry(ls, &lockspaces, list) {
pthread_mutex_lock(&ls->mutex);
if (ls->thread_stop) {
pthread_mutex_unlock(&ls->mutex);
continue;
}
if (!do_force && for_each_lock(ls, LOCKS_EXIST_ANY)) {
need_stop++;
} else {
ls->thread_work = 1;
ls->thread_stop = 1;
pthread_cond_signal(&ls->cond);
stop_count++;
}
pthread_mutex_unlock(&ls->mutex);
}
}
if (do_free) {
list_for_each_entry_safe(ls, safe, &lockspaces, list) {
pthread_mutex_lock(&ls->mutex);
done = ls->thread_done;
stop = ls->thread_stop;
pthread_mutex_unlock(&ls->mutex);
/* This ls has locks and force is not set. */
if (!stop)
continue;
/*
* Once thread_done is set, we know that the lockspace_thread
* will not be using/touching the ls struct. Any other
* thread touches the ls struct under lockspaces_mutex.
*/
if (done) {
pthread_join(ls->thread, NULL);
list_del(&ls->list);
/* FIXME: will free_vg ever not be set? */
if (ls->free_vg) {
/* In future we may need to free ls->actions here */
free_ls_resources(ls);
free(ls);
free_count++;
}
} else {
need_free++;
}
}
}
if (list_empty(&lockspaces)) {
if (!gl_type_static) {
gl_use_dlm = 0;
gl_use_sanlock = 0;
}
}
pthread_mutex_unlock(&lockspaces_mutex);
if (stop_count || free_count || need_stop || need_free) {
log_debug("for_each_lockspace do_stop %d do_free %d "
"stop_count %d free_count %d need_stop %d need_free %d",
do_stop, do_free, stop_count, free_count, need_stop, need_free);
}
return need_stop + need_free;
}
/*
* This is only called when the daemon is exiting so the sleep/retry
* loop doesn't have any adverse impact.
*/
static void for_each_lockspace_retry(int do_stop, int do_free, int do_force)
{
int count;
while (1) {
count = for_each_lockspace(do_stop, do_free, do_force);
if (!count)
break;
log_debug("for_each_lockspace_retry remaining %d", count);
sleep(1);
}
}
static int work_init_vg(struct action *act)
{
struct lockspace *ls;
char ls_name[MAX_NAME+1];
int rv = 0;
memset(ls_name, 0, sizeof(ls_name));
vg_ls_name(act->vg_name, ls_name);
/*
* The max dlm ls name is 64 and the max sanlock ls name is 48. So,
* after the "lvm_" prefix, only the first 60/44 characters of the VG
* name are used for the lockspace name. This will cause a collision
* in the lock manager if two different VG names have the first 60/44
* chars in common. At the time of vgcreate (here), check if any other
* VG's are known that would collide. If the collision is not detected
* at vgcreate time, it will be detected at start time and add_lockspace
* will fail for the second of the two matching ls names.
*/
pthread_mutex_lock(&lockspaces_mutex);
list_for_each_entry(ls, &lockspaces, list) {
if ((ls->lm_type == LD_LM_SANLOCK) && !strncmp(ls->name, ls_name, 48)) {
rv = -EEXIST;
break;
}
if ((ls->lm_type == LD_LM_DLM) && !strcmp(ls->name, ls_name)) {
rv = -EEXIST;
break;
}
}
pthread_mutex_unlock(&lockspaces_mutex);
if (rv == -EEXIST) {
log_error("Existing lockspace name %s matches new %s VG names %s %s",
ls->name, ls_name, ls->vg_name, act->vg_name);
return rv;