1/*
2 * Copyright (C) 2006-2010 Red Hat, Inc.  All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/init.h>
11#include <linux/wait.h>
12#include <linux/module.h>
13#include <linux/file.h>
14#include <linux/fs.h>
15#include <linux/poll.h>
16#include <linux/signal.h>
17#include <linux/spinlock.h>
18#include <linux/dlm.h>
19#include <linux/dlm_device.h>
20#include <linux/slab.h>
21
22#include "dlm_internal.h"
23#include "lockspace.h"
24#include "lock.h"
25#include "lvb_table.h"
26#include "user.h"
27#include "ast.h"
28
29static const char name_prefix[] = "dlm";
30static const struct file_operations device_fops;
31static atomic_t dlm_monitor_opened;
32static int dlm_monitor_unused = 1;
33
34#ifdef CONFIG_COMPAT
35
36struct dlm_lock_params32 {
37	__u8 mode;
38	__u8 namelen;
39	__u16 unused;
40	__u32 flags;
41	__u32 lkid;
42	__u32 parent;
43	__u64 xid;
44	__u64 timeout;
45	__u32 castparam;
46	__u32 castaddr;
47	__u32 bastparam;
48	__u32 bastaddr;
49	__u32 lksb;
50	char lvb[DLM_USER_LVB_LEN];
51	char name[0];
52};
53
54struct dlm_write_request32 {
55	__u32 version[3];
56	__u8 cmd;
57	__u8 is64bit;
58	__u8 unused[2];
59
60	union  {
61		struct dlm_lock_params32 lock;
62		struct dlm_lspace_params lspace;
63		struct dlm_purge_params purge;
64	} i;
65};
66
67struct dlm_lksb32 {
68	__u32 sb_status;
69	__u32 sb_lkid;
70	__u8 sb_flags;
71	__u32 sb_lvbptr;
72};
73
74struct dlm_lock_result32 {
75	__u32 version[3];
76	__u32 length;
77	__u32 user_astaddr;
78	__u32 user_astparam;
79	__u32 user_lksb;
80	struct dlm_lksb32 lksb;
81	__u8 bast_mode;
82	__u8 unused[3];
83	/* Offsets may be zero if no data is present */
84	__u32 lvb_offset;
85};
86
87static void compat_input(struct dlm_write_request *kb,
88			 struct dlm_write_request32 *kb32,
89			 int namelen)
90{
91	kb->version[0] = kb32->version[0];
92	kb->version[1] = kb32->version[1];
93	kb->version[2] = kb32->version[2];
94
95	kb->cmd = kb32->cmd;
96	kb->is64bit = kb32->is64bit;
97	if (kb->cmd == DLM_USER_CREATE_LOCKSPACE ||
98	    kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
99		kb->i.lspace.flags = kb32->i.lspace.flags;
100		kb->i.lspace.minor = kb32->i.lspace.minor;
101		memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen);
102	} else if (kb->cmd == DLM_USER_PURGE) {
103		kb->i.purge.nodeid = kb32->i.purge.nodeid;
104		kb->i.purge.pid = kb32->i.purge.pid;
105	} else {
106		kb->i.lock.mode = kb32->i.lock.mode;
107		kb->i.lock.namelen = kb32->i.lock.namelen;
108		kb->i.lock.flags = kb32->i.lock.flags;
109		kb->i.lock.lkid = kb32->i.lock.lkid;
110		kb->i.lock.parent = kb32->i.lock.parent;
111		kb->i.lock.xid = kb32->i.lock.xid;
112		kb->i.lock.timeout = kb32->i.lock.timeout;
113		kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
114		kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
115		kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
116		kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
117		kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
118		memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
119		memcpy(kb->i.lock.name, kb32->i.lock.name, namelen);
120	}
121}
122
123static void compat_output(struct dlm_lock_result *res,
124			  struct dlm_lock_result32 *res32)
125{
126	res32->version[0] = res->version[0];
127	res32->version[1] = res->version[1];
128	res32->version[2] = res->version[2];
129
130	res32->user_astaddr = (__u32)(long)res->user_astaddr;
131	res32->user_astparam = (__u32)(long)res->user_astparam;
132	res32->user_lksb = (__u32)(long)res->user_lksb;
133	res32->bast_mode = res->bast_mode;
134
135	res32->lvb_offset = res->lvb_offset;
136	res32->length = res->length;
137
138	res32->lksb.sb_status = res->lksb.sb_status;
139	res32->lksb.sb_flags = res->lksb.sb_flags;
140	res32->lksb.sb_lkid = res->lksb.sb_lkid;
141	res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
142}
143#endif
144
145/* Figure out if this lock is at the end of its life and no longer
146   available for the application to use.  The lkb still exists until
147   the final ast is read.  A lock becomes EOL in three situations:
148     1. a noqueue request fails with EAGAIN
149     2. an unlock completes with EUNLOCK
150     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
151   An EOL lock needs to be removed from the process's list of locks.
152   And we can't allow any new operation on an EOL lock.  This is
153   not related to the lifetime of the lkb struct which is managed
154   entirely by refcount. */
155
156static int lkb_is_endoflife(int mode, int status)
157{
158	switch (status) {
159	case -DLM_EUNLOCK:
160		return 1;
161	case -DLM_ECANCEL:
162	case -ETIMEDOUT:
163	case -EDEADLK:
164	case -EAGAIN:
165		if (mode == DLM_LOCK_IV)
166			return 1;
167		break;
168	}
169	return 0;
170}
171
172/* we could possibly check if the cancel of an orphan has resulted in the lkb
173   being removed and then remove that lkb from the orphans list and free it */
174
175void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
176		      int status, uint32_t sbflags, uint64_t seq)
177{
178	struct dlm_ls *ls;
179	struct dlm_user_args *ua;
180	struct dlm_user_proc *proc;
181	int rv;
182
183	if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
184		return;
185
186	ls = lkb->lkb_resource->res_ls;
187	mutex_lock(&ls->ls_clear_proc_locks);
188
189	/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
190	   can't be delivered.  For ORPHAN's, dlm_clear_proc_locks() freed
191	   lkb->ua so we can't try to use it.  This second check is necessary
192	   for cases where a completion ast is received for an operation that
193	   began before clear_proc_locks did its cancel/unlock. */
194
195	if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
196		goto out;
197
198	DLM_ASSERT(lkb->lkb_ua, dlm_print_lkb(lkb););
199	ua = lkb->lkb_ua;
200	proc = ua->proc;
201
202	if ((flags & DLM_CB_BAST) && ua->bastaddr == NULL)
203		goto out;
204
205	if ((flags & DLM_CB_CAST) && lkb_is_endoflife(mode, status))
206		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
207
208	spin_lock(&proc->asts_spin);
209
210	rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
211	if (rv < 0) {
212		spin_unlock(&proc->asts_spin);
213		goto out;
214	}
215
216	if (list_empty(&lkb->lkb_cb_list)) {
217		kref_get(&lkb->lkb_ref);
218		list_add_tail(&lkb->lkb_cb_list, &proc->asts);
219		wake_up_interruptible(&proc->wait);
220	}
221	spin_unlock(&proc->asts_spin);
222
223	if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
224		/* N.B. spin_lock locks_spin, not asts_spin */
225		spin_lock(&proc->locks_spin);
226		if (!list_empty(&lkb->lkb_ownqueue)) {
227			list_del_init(&lkb->lkb_ownqueue);
228			dlm_put_lkb(lkb);
229		}
230		spin_unlock(&proc->locks_spin);
231	}
232 out:
233	mutex_unlock(&ls->ls_clear_proc_locks);
234}
235
236static int device_user_lock(struct dlm_user_proc *proc,
237			    struct dlm_lock_params *params)
238{
239	struct dlm_ls *ls;
240	struct dlm_user_args *ua;
241	uint32_t lkid;
242	int error = -ENOMEM;
243
244	ls = dlm_find_lockspace_local(proc->lockspace);
245	if (!ls)
246		return -ENOENT;
247
248	if (!params->castaddr || !params->lksb) {
249		error = -EINVAL;
250		goto out;
251	}
252
253	ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
254	if (!ua)
255		goto out;
256	ua->proc = proc;
257	ua->user_lksb = params->lksb;
258	ua->castparam = params->castparam;
259	ua->castaddr = params->castaddr;
260	ua->bastparam = params->bastparam;
261	ua->bastaddr = params->bastaddr;
262	ua->xid = params->xid;
263
264	if (params->flags & DLM_LKF_CONVERT) {
265		error = dlm_user_convert(ls, ua,
266				         params->mode, params->flags,
267				         params->lkid, params->lvb,
268					 (unsigned long) params->timeout);
269	} else if (params->flags & DLM_LKF_ORPHAN) {
270		error = dlm_user_adopt_orphan(ls, ua,
271					 params->mode, params->flags,
272					 params->name, params->namelen,
273					 (unsigned long) params->timeout,
274					 &lkid);
275		if (!error)
276			error = lkid;
277	} else {
278		error = dlm_user_request(ls, ua,
279					 params->mode, params->flags,
280					 params->name, params->namelen,
281					 (unsigned long) params->timeout);
282		if (!error)
283			error = ua->lksb.sb_lkid;
284	}
285 out:
286	dlm_put_lockspace(ls);
287	return error;
288}
289
290static int device_user_unlock(struct dlm_user_proc *proc,
291			      struct dlm_lock_params *params)
292{
293	struct dlm_ls *ls;
294	struct dlm_user_args *ua;
295	int error = -ENOMEM;
296
297	ls = dlm_find_lockspace_local(proc->lockspace);
298	if (!ls)
299		return -ENOENT;
300
301	ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
302	if (!ua)
303		goto out;
304	ua->proc = proc;
305	ua->user_lksb = params->lksb;
306	ua->castparam = params->castparam;
307	ua->castaddr = params->castaddr;
308
309	if (params->flags & DLM_LKF_CANCEL)
310		error = dlm_user_cancel(ls, ua, params->flags, params->lkid);
311	else
312		error = dlm_user_unlock(ls, ua, params->flags, params->lkid,
313					params->lvb);
314 out:
315	dlm_put_lockspace(ls);
316	return error;
317}
318
319static int device_user_deadlock(struct dlm_user_proc *proc,
320				struct dlm_lock_params *params)
321{
322	struct dlm_ls *ls;
323	int error;
324
325	ls = dlm_find_lockspace_local(proc->lockspace);
326	if (!ls)
327		return -ENOENT;
328
329	error = dlm_user_deadlock(ls, params->flags, params->lkid);
330
331	dlm_put_lockspace(ls);
332	return error;
333}
334
335static int dlm_device_register(struct dlm_ls *ls, char *name)
336{
337	int error, len;
338
339	/* The device is already registered.  This happens when the
340	   lockspace is created multiple times from userspace. */
341	if (ls->ls_device.name)
342		return 0;
343
344	error = -ENOMEM;
345	len = strlen(name) + strlen(name_prefix) + 2;
346	ls->ls_device.name = kzalloc(len, GFP_NOFS);
347	if (!ls->ls_device.name)
348		goto fail;
349
350	snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
351		 name);
352	ls->ls_device.fops = &device_fops;
353	ls->ls_device.minor = MISC_DYNAMIC_MINOR;
354
355	error = misc_register(&ls->ls_device);
356	if (error) {
357		kfree(ls->ls_device.name);
358	}
359fail:
360	return error;
361}
362
363int dlm_device_deregister(struct dlm_ls *ls)
364{
365	/* The device is not registered.  This happens when the lockspace
366	   was never used from userspace, or when device_create_lockspace()
367	   calls dlm_release_lockspace() after the register fails. */
368	if (!ls->ls_device.name)
369		return 0;
370
371	misc_deregister(&ls->ls_device);
372	kfree(ls->ls_device.name);
373	return 0;
374}
375
376static int device_user_purge(struct dlm_user_proc *proc,
377			     struct dlm_purge_params *params)
378{
379	struct dlm_ls *ls;
380	int error;
381
382	ls = dlm_find_lockspace_local(proc->lockspace);
383	if (!ls)
384		return -ENOENT;
385
386	error = dlm_user_purge(ls, proc, params->nodeid, params->pid);
387
388	dlm_put_lockspace(ls);
389	return error;
390}
391
392static int device_create_lockspace(struct dlm_lspace_params *params)
393{
394	dlm_lockspace_t *lockspace;
395	struct dlm_ls *ls;
396	int error;
397
398	if (!capable(CAP_SYS_ADMIN))
399		return -EPERM;
400
401	error = dlm_new_lockspace(params->name, NULL, params->flags,
402				  DLM_USER_LVB_LEN, NULL, NULL, NULL,
403				  &lockspace);
404	if (error)
405		return error;
406
407	ls = dlm_find_lockspace_local(lockspace);
408	if (!ls)
409		return -ENOENT;
410
411	error = dlm_device_register(ls, params->name);
412	dlm_put_lockspace(ls);
413
414	if (error)
415		dlm_release_lockspace(lockspace, 0);
416	else
417		error = ls->ls_device.minor;
418
419	return error;
420}
421
422static int device_remove_lockspace(struct dlm_lspace_params *params)
423{
424	dlm_lockspace_t *lockspace;
425	struct dlm_ls *ls;
426	int error, force = 0;
427
428	if (!capable(CAP_SYS_ADMIN))
429		return -EPERM;
430
431	ls = dlm_find_lockspace_device(params->minor);
432	if (!ls)
433		return -ENOENT;
434
435	if (params->flags & DLM_USER_LSFLG_FORCEFREE)
436		force = 2;
437
438	lockspace = ls->ls_local_handle;
439	dlm_put_lockspace(ls);
440
441	/* The final dlm_release_lockspace waits for references to go to
442	   zero, so all processes will need to close their device for the
443	   ls before the release will proceed.  release also calls the
444	   device_deregister above.  Converting a positive return value
445	   from release to zero means that userspace won't know when its
446	   release was the final one, but it shouldn't need to know. */
447
448	error = dlm_release_lockspace(lockspace, force);
449	if (error > 0)
450		error = 0;
451	return error;
452}
453
454/* Check the user's version matches ours */
455static int check_version(struct dlm_write_request *req)
456{
457	if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
458	    (req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
459	     req->version[1] > DLM_DEVICE_VERSION_MINOR)) {
460
461		printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
462		       "user (%d.%d.%d) kernel (%d.%d.%d)\n",
463		       current->comm,
464		       task_pid_nr(current),
465		       req->version[0],
466		       req->version[1],
467		       req->version[2],
468		       DLM_DEVICE_VERSION_MAJOR,
469		       DLM_DEVICE_VERSION_MINOR,
470		       DLM_DEVICE_VERSION_PATCH);
471		return -EINVAL;
472	}
473	return 0;
474}
475
476/*
477 * device_write
478 *
479 *   device_user_lock
480 *     dlm_user_request -> request_lock
481 *     dlm_user_convert -> convert_lock
482 *
483 *   device_user_unlock
484 *     dlm_user_unlock -> unlock_lock
485 *     dlm_user_cancel -> cancel_lock
486 *
487 *   device_create_lockspace
488 *     dlm_new_lockspace
489 *
490 *   device_remove_lockspace
491 *     dlm_release_lockspace
492 */
493
494/* a write to a lockspace device is a lock or unlock request, a write
495   to the control device is to create/remove a lockspace */
496
497static ssize_t device_write(struct file *file, const char __user *buf,
498			    size_t count, loff_t *ppos)
499{
500	struct dlm_user_proc *proc = file->private_data;
501	struct dlm_write_request *kbuf;
502	int error;
503
504#ifdef CONFIG_COMPAT
505	if (count < sizeof(struct dlm_write_request32))
506#else
507	if (count < sizeof(struct dlm_write_request))
508#endif
509		return -EINVAL;
510
511	/*
512	 * can't compare against COMPAT/dlm_write_request32 because
513	 * we don't yet know if is64bit is zero
514	 */
515	if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
516		return -EINVAL;
517
518	kbuf = kzalloc(count + 1, GFP_NOFS);
519	if (!kbuf)
520		return -ENOMEM;
521
522	if (copy_from_user(kbuf, buf, count)) {
523		error = -EFAULT;
524		goto out_free;
525	}
526
527	if (check_version(kbuf)) {
528		error = -EBADE;
529		goto out_free;
530	}
531
532#ifdef CONFIG_COMPAT
533	if (!kbuf->is64bit) {
534		struct dlm_write_request32 *k32buf;
535		int namelen = 0;
536
537		if (count > sizeof(struct dlm_write_request32))
538			namelen = count - sizeof(struct dlm_write_request32);
539
540		k32buf = (struct dlm_write_request32 *)kbuf;
541
542		/* add 1 after namelen so that the name string is terminated */
543		kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
544			       GFP_NOFS);
545		if (!kbuf) {
546			kfree(k32buf);
547			return -ENOMEM;
548		}
549
550		if (proc)
551			set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
552
553		compat_input(kbuf, k32buf, namelen);
554		kfree(k32buf);
555	}
556#endif
557
558	/* do we really need this? can a write happen after a close? */
559	if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
560	    (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) {
561		error = -EINVAL;
562		goto out_free;
563	}
564
565	error = -EINVAL;
566
567	switch (kbuf->cmd)
568	{
569	case DLM_USER_LOCK:
570		if (!proc) {
571			log_print("no locking on control device");
572			goto out_free;
573		}
574		error = device_user_lock(proc, &kbuf->i.lock);
575		break;
576
577	case DLM_USER_UNLOCK:
578		if (!proc) {
579			log_print("no locking on control device");
580			goto out_free;
581		}
582		error = device_user_unlock(proc, &kbuf->i.lock);
583		break;
584
585	case DLM_USER_DEADLOCK:
586		if (!proc) {
587			log_print("no locking on control device");
588			goto out_free;
589		}
590		error = device_user_deadlock(proc, &kbuf->i.lock);
591		break;
592
593	case DLM_USER_CREATE_LOCKSPACE:
594		if (proc) {
595			log_print("create/remove only on control device");
596			goto out_free;
597		}
598		error = device_create_lockspace(&kbuf->i.lspace);
599		break;
600
601	case DLM_USER_REMOVE_LOCKSPACE:
602		if (proc) {
603			log_print("create/remove only on control device");
604			goto out_free;
605		}
606		error = device_remove_lockspace(&kbuf->i.lspace);
607		break;
608
609	case DLM_USER_PURGE:
610		if (!proc) {
611			log_print("no locking on control device");
612			goto out_free;
613		}
614		error = device_user_purge(proc, &kbuf->i.purge);
615		break;
616
617	default:
618		log_print("Unknown command passed to DLM device : %d\n",
619			  kbuf->cmd);
620	}
621
622 out_free:
623	kfree(kbuf);
624	return error;
625}
626
627/* Every process that opens the lockspace device has its own "proc" structure
628   hanging off the open file that's used to keep track of locks owned by the
629   process and asts that need to be delivered to the process. */
630
631static int device_open(struct inode *inode, struct file *file)
632{
633	struct dlm_user_proc *proc;
634	struct dlm_ls *ls;
635
636	ls = dlm_find_lockspace_device(iminor(inode));
637	if (!ls)
638		return -ENOENT;
639
640	proc = kzalloc(sizeof(struct dlm_user_proc), GFP_NOFS);
641	if (!proc) {
642		dlm_put_lockspace(ls);
643		return -ENOMEM;
644	}
645
646	proc->lockspace = ls->ls_local_handle;
647	INIT_LIST_HEAD(&proc->asts);
648	INIT_LIST_HEAD(&proc->locks);
649	INIT_LIST_HEAD(&proc->unlocking);
650	spin_lock_init(&proc->asts_spin);
651	spin_lock_init(&proc->locks_spin);
652	init_waitqueue_head(&proc->wait);
653	file->private_data = proc;
654
655	return 0;
656}
657
658static int device_close(struct inode *inode, struct file *file)
659{
660	struct dlm_user_proc *proc = file->private_data;
661	struct dlm_ls *ls;
662
663	ls = dlm_find_lockspace_local(proc->lockspace);
664	if (!ls)
665		return -ENOENT;
666
667	set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags);
668
669	dlm_clear_proc_locks(ls, proc);
670
671	/* at this point no more lkb's should exist for this lockspace,
672	   so there's no chance of dlm_user_add_ast() being called and
673	   looking for lkb->ua->proc */
674
675	kfree(proc);
676	file->private_data = NULL;
677
678	dlm_put_lockspace(ls);
679	dlm_put_lockspace(ls);  /* for the find in device_open() */
680
681	/* FIXME: AUTOFREE: if this ls is no longer used do
682	   device_remove_lockspace() */
683
684	return 0;
685}
686
687static int copy_result_to_user(struct dlm_user_args *ua, int compat,
688			       uint32_t flags, int mode, int copy_lvb,
689			       char __user *buf, size_t count)
690{
691#ifdef CONFIG_COMPAT
692	struct dlm_lock_result32 result32;
693#endif
694	struct dlm_lock_result result;
695	void *resultptr;
696	int error=0;
697	int len;
698	int struct_len;
699
700	memset(&result, 0, sizeof(struct dlm_lock_result));
701	result.version[0] = DLM_DEVICE_VERSION_MAJOR;
702	result.version[1] = DLM_DEVICE_VERSION_MINOR;
703	result.version[2] = DLM_DEVICE_VERSION_PATCH;
704	memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
705	result.user_lksb = ua->user_lksb;
706
707	/* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
708	   in a conversion unless the conversion is successful.  See code
709	   in dlm_user_convert() for updating ua from ua_tmp.  OpenVMS, though,
710	   notes that a new blocking AST address and parameter are set even if
711	   the conversion fails, so maybe we should just do that. */
712
713	if (flags & DLM_CB_BAST) {
714		result.user_astaddr = ua->bastaddr;
715		result.user_astparam = ua->bastparam;
716		result.bast_mode = mode;
717	} else {
718		result.user_astaddr = ua->castaddr;
719		result.user_astparam = ua->castparam;
720	}
721
722#ifdef CONFIG_COMPAT
723	if (compat)
724		len = sizeof(struct dlm_lock_result32);
725	else
726#endif
727		len = sizeof(struct dlm_lock_result);
728	struct_len = len;
729
730	/* copy lvb to userspace if there is one, it's been updated, and
731	   the user buffer has space for it */
732
733	if (copy_lvb && ua->lksb.sb_lvbptr && count >= len + DLM_USER_LVB_LEN) {
734		if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
735				 DLM_USER_LVB_LEN)) {
736			error = -EFAULT;
737			goto out;
738		}
739
740		result.lvb_offset = len;
741		len += DLM_USER_LVB_LEN;
742	}
743
744	result.length = len;
745	resultptr = &result;
746#ifdef CONFIG_COMPAT
747	if (compat) {
748		compat_output(&result, &result32);
749		resultptr = &result32;
750	}
751#endif
752
753	if (copy_to_user(buf, resultptr, struct_len))
754		error = -EFAULT;
755	else
756		error = len;
757 out:
758	return error;
759}
760
761static int copy_version_to_user(char __user *buf, size_t count)
762{
763	struct dlm_device_version ver;
764
765	memset(&ver, 0, sizeof(struct dlm_device_version));
766	ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
767	ver.version[1] = DLM_DEVICE_VERSION_MINOR;
768	ver.version[2] = DLM_DEVICE_VERSION_PATCH;
769
770	if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
771		return -EFAULT;
772	return sizeof(struct dlm_device_version);
773}
774
775/* a read returns a single ast described in a struct dlm_lock_result */
776
777static ssize_t device_read(struct file *file, char __user *buf, size_t count,
778			   loff_t *ppos)
779{
780	struct dlm_user_proc *proc = file->private_data;
781	struct dlm_lkb *lkb;
782	DECLARE_WAITQUEUE(wait, current);
783	struct dlm_callback cb;
784	int rv, resid, copy_lvb = 0;
785	int old_mode, new_mode;
786
787	if (count == sizeof(struct dlm_device_version)) {
788		rv = copy_version_to_user(buf, count);
789		return rv;
790	}
791
792	if (!proc) {
793		log_print("non-version read from control device %zu", count);
794		return -EINVAL;
795	}
796
797#ifdef CONFIG_COMPAT
798	if (count < sizeof(struct dlm_lock_result32))
799#else
800	if (count < sizeof(struct dlm_lock_result))
801#endif
802		return -EINVAL;
803
804 try_another:
805
806	/* do we really need this? can a read happen after a close? */
807	if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
808		return -EINVAL;
809
810	spin_lock(&proc->asts_spin);
811	if (list_empty(&proc->asts)) {
812		if (file->f_flags & O_NONBLOCK) {
813			spin_unlock(&proc->asts_spin);
814			return -EAGAIN;
815		}
816
817		add_wait_queue(&proc->wait, &wait);
818
819	repeat:
820		set_current_state(TASK_INTERRUPTIBLE);
821		if (list_empty(&proc->asts) && !signal_pending(current)) {
822			spin_unlock(&proc->asts_spin);
823			schedule();
824			spin_lock(&proc->asts_spin);
825			goto repeat;
826		}
827		set_current_state(TASK_RUNNING);
828		remove_wait_queue(&proc->wait, &wait);
829
830		if (signal_pending(current)) {
831			spin_unlock(&proc->asts_spin);
832			return -ERESTARTSYS;
833		}
834	}
835
836	/* if we empty lkb_callbacks, we don't want to unlock the spinlock
837	   without removing lkb_cb_list; so empty lkb_cb_list is always
838	   consistent with empty lkb_callbacks */
839
840	lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
841
842	/* rem_lkb_callback sets a new lkb_last_cast */
843	old_mode = lkb->lkb_last_cast.mode;
844
845	rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
846	if (rv < 0) {
847		/* this shouldn't happen; lkb should have been removed from
848		   list when resid was zero */
849		log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
850		list_del_init(&lkb->lkb_cb_list);
851		spin_unlock(&proc->asts_spin);
852		/* removes ref for proc->asts, may cause lkb to be freed */
853		dlm_put_lkb(lkb);
854		goto try_another;
855	}
856	if (!resid)
857		list_del_init(&lkb->lkb_cb_list);
858	spin_unlock(&proc->asts_spin);
859
860	if (cb.flags & DLM_CB_SKIP) {
861		/* removes ref for proc->asts, may cause lkb to be freed */
862		if (!resid)
863			dlm_put_lkb(lkb);
864		goto try_another;
865	}
866
867	if (cb.flags & DLM_CB_CAST) {
868		new_mode = cb.mode;
869
870		if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr &&
871		    dlm_lvb_operations[old_mode + 1][new_mode + 1])
872			copy_lvb = 1;
873
874		lkb->lkb_lksb->sb_status = cb.sb_status;
875		lkb->lkb_lksb->sb_flags = cb.sb_flags;
876	}
877
878	rv = copy_result_to_user(lkb->lkb_ua,
879				 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
880				 cb.flags, cb.mode, copy_lvb, buf, count);
881
882	/* removes ref for proc->asts, may cause lkb to be freed */
883	if (!resid)
884		dlm_put_lkb(lkb);
885
886	return rv;
887}
888
889static unsigned int device_poll(struct file *file, poll_table *wait)
890{
891	struct dlm_user_proc *proc = file->private_data;
892
893	poll_wait(file, &proc->wait, wait);
894
895	spin_lock(&proc->asts_spin);
896	if (!list_empty(&proc->asts)) {
897		spin_unlock(&proc->asts_spin);
898		return POLLIN | POLLRDNORM;
899	}
900	spin_unlock(&proc->asts_spin);
901	return 0;
902}
903
904int dlm_user_daemon_available(void)
905{
906	/* dlm_controld hasn't started (or, has started, but not
907	   properly populated configfs) */
908
909	if (!dlm_our_nodeid())
910		return 0;
911
912	/* This is to deal with versions of dlm_controld that don't
913	   know about the monitor device.  We assume that if the
914	   dlm_controld was started (above), but the monitor device
915	   was never opened, that it's an old version.  dlm_controld
916	   should open the monitor device before populating configfs. */
917
918	if (dlm_monitor_unused)
919		return 1;
920
921	return atomic_read(&dlm_monitor_opened) ? 1 : 0;
922}
923
924static int ctl_device_open(struct inode *inode, struct file *file)
925{
926	file->private_data = NULL;
927	return 0;
928}
929
930static int ctl_device_close(struct inode *inode, struct file *file)
931{
932	return 0;
933}
934
935static int monitor_device_open(struct inode *inode, struct file *file)
936{
937	atomic_inc(&dlm_monitor_opened);
938	dlm_monitor_unused = 0;
939	return 0;
940}
941
942static int monitor_device_close(struct inode *inode, struct file *file)
943{
944	if (atomic_dec_and_test(&dlm_monitor_opened))
945		dlm_stop_lockspaces();
946	return 0;
947}
948
949static const struct file_operations device_fops = {
950	.open    = device_open,
951	.release = device_close,
952	.read    = device_read,
953	.write   = device_write,
954	.poll    = device_poll,
955	.owner   = THIS_MODULE,
956	.llseek  = noop_llseek,
957};
958
959static const struct file_operations ctl_device_fops = {
960	.open    = ctl_device_open,
961	.release = ctl_device_close,
962	.read    = device_read,
963	.write   = device_write,
964	.owner   = THIS_MODULE,
965	.llseek  = noop_llseek,
966};
967
968static struct miscdevice ctl_device = {
969	.name  = "dlm-control",
970	.fops  = &ctl_device_fops,
971	.minor = MISC_DYNAMIC_MINOR,
972};
973
974static const struct file_operations monitor_device_fops = {
975	.open    = monitor_device_open,
976	.release = monitor_device_close,
977	.owner   = THIS_MODULE,
978	.llseek  = noop_llseek,
979};
980
981static struct miscdevice monitor_device = {
982	.name  = "dlm-monitor",
983	.fops  = &monitor_device_fops,
984	.minor = MISC_DYNAMIC_MINOR,
985};
986
987int __init dlm_user_init(void)
988{
989	int error;
990
991	atomic_set(&dlm_monitor_opened, 0);
992
993	error = misc_register(&ctl_device);
994	if (error) {
995		log_print("misc_register failed for control device");
996		goto out;
997	}
998
999	error = misc_register(&monitor_device);
1000	if (error) {
1001		log_print("misc_register failed for monitor device");
1002		misc_deregister(&ctl_device);
1003	}
1004 out:
1005	return error;
1006}
1007
1008void dlm_user_exit(void)
1009{
1010	misc_deregister(&ctl_device);
1011	misc_deregister(&monitor_device);
1012}
1013
1014