1/*
2*  Copyright (c) 2001 The Regents of the University of Michigan.
3*  All rights reserved.
4*
5*  Kendrick Smith <kmsmith@umich.edu>
6*  Andy Adamson <kandros@umich.edu>
7*
8*  Redistribution and use in source and binary forms, with or without
9*  modification, are permitted provided that the following conditions
10*  are met:
11*
12*  1. Redistributions of source code must retain the above copyright
13*     notice, this list of conditions and the following disclaimer.
14*  2. Redistributions in binary form must reproduce the above copyright
15*     notice, this list of conditions and the following disclaimer in the
16*     documentation and/or other materials provided with the distribution.
17*  3. Neither the name of the University nor the names of its
18*     contributors may be used to endorse or promote products derived
19*     from this software without specific prior written permission.
20*
21*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*
33*/
34
35#include <linux/file.h>
36#include <linux/fs.h>
37#include <linux/slab.h>
38#include <linux/namei.h>
39#include <linux/swap.h>
40#include <linux/pagemap.h>
41#include <linux/ratelimit.h>
42#include <linux/sunrpc/svcauth_gss.h>
43#include <linux/sunrpc/addr.h>
44#include <linux/jhash.h>
45#include "xdr4.h"
46#include "xdr4cb.h"
47#include "vfs.h"
48#include "current_stateid.h"
49
50#include "netns.h"
51#include "pnfs.h"
52
53#define NFSDDBG_FACILITY                NFSDDBG_PROC
54
55#define all_ones {{~0,~0},~0}
56static const stateid_t one_stateid = {
57	.si_generation = ~0,
58	.si_opaque = all_ones,
59};
60static const stateid_t zero_stateid = {
61	/* all fields zero */
62};
63static const stateid_t currentstateid = {
64	.si_generation = 1,
65};
66
67static u64 current_sessionid = 1;
68
69#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
70#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
71#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
72
73/* forward declarations */
74static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
75static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
76
77/* Locking: */
78
79/*
80 * Currently used for the del_recall_lru and file hash table.  In an
81 * effort to decrease the scope of the client_mutex, this spinlock may
82 * eventually cover more:
83 */
84static DEFINE_SPINLOCK(state_lock);
85
86/*
87 * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
88 * the refcount on the open stateid to drop.
89 */
90static DECLARE_WAIT_QUEUE_HEAD(close_wq);
91
92static struct kmem_cache *openowner_slab;
93static struct kmem_cache *lockowner_slab;
94static struct kmem_cache *file_slab;
95static struct kmem_cache *stateid_slab;
96static struct kmem_cache *deleg_slab;
97static struct kmem_cache *odstate_slab;
98
99static void free_session(struct nfsd4_session *);
100
101static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
102
103static bool is_session_dead(struct nfsd4_session *ses)
104{
105	return ses->se_flags & NFS4_SESSION_DEAD;
106}
107
108static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
109{
110	if (atomic_read(&ses->se_ref) > ref_held_by_me)
111		return nfserr_jukebox;
112	ses->se_flags |= NFS4_SESSION_DEAD;
113	return nfs_ok;
114}
115
116static bool is_client_expired(struct nfs4_client *clp)
117{
118	return clp->cl_time == 0;
119}
120
121static __be32 get_client_locked(struct nfs4_client *clp)
122{
123	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
124
125	lockdep_assert_held(&nn->client_lock);
126
127	if (is_client_expired(clp))
128		return nfserr_expired;
129	atomic_inc(&clp->cl_refcount);
130	return nfs_ok;
131}
132
133/* must be called under the client_lock */
134static inline void
135renew_client_locked(struct nfs4_client *clp)
136{
137	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
138
139	if (is_client_expired(clp)) {
140		WARN_ON(1);
141		printk("%s: client (clientid %08x/%08x) already expired\n",
142			__func__,
143			clp->cl_clientid.cl_boot,
144			clp->cl_clientid.cl_id);
145		return;
146	}
147
148	dprintk("renewing client (clientid %08x/%08x)\n",
149			clp->cl_clientid.cl_boot,
150			clp->cl_clientid.cl_id);
151	list_move_tail(&clp->cl_lru, &nn->client_lru);
152	clp->cl_time = get_seconds();
153}
154
155static void put_client_renew_locked(struct nfs4_client *clp)
156{
157	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
158
159	lockdep_assert_held(&nn->client_lock);
160
161	if (!atomic_dec_and_test(&clp->cl_refcount))
162		return;
163	if (!is_client_expired(clp))
164		renew_client_locked(clp);
165}
166
167static void put_client_renew(struct nfs4_client *clp)
168{
169	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
170
171	if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
172		return;
173	if (!is_client_expired(clp))
174		renew_client_locked(clp);
175	spin_unlock(&nn->client_lock);
176}
177
178static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
179{
180	__be32 status;
181
182	if (is_session_dead(ses))
183		return nfserr_badsession;
184	status = get_client_locked(ses->se_client);
185	if (status)
186		return status;
187	atomic_inc(&ses->se_ref);
188	return nfs_ok;
189}
190
191static void nfsd4_put_session_locked(struct nfsd4_session *ses)
192{
193	struct nfs4_client *clp = ses->se_client;
194	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
195
196	lockdep_assert_held(&nn->client_lock);
197
198	if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
199		free_session(ses);
200	put_client_renew_locked(clp);
201}
202
203static void nfsd4_put_session(struct nfsd4_session *ses)
204{
205	struct nfs4_client *clp = ses->se_client;
206	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
207
208	spin_lock(&nn->client_lock);
209	nfsd4_put_session_locked(ses);
210	spin_unlock(&nn->client_lock);
211}
212
213static inline struct nfs4_stateowner *
214nfs4_get_stateowner(struct nfs4_stateowner *sop)
215{
216	atomic_inc(&sop->so_count);
217	return sop;
218}
219
220static int
221same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
222{
223	return (sop->so_owner.len == owner->len) &&
224		0 == memcmp(sop->so_owner.data, owner->data, owner->len);
225}
226
227static struct nfs4_openowner *
228find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
229			struct nfs4_client *clp)
230{
231	struct nfs4_stateowner *so;
232
233	lockdep_assert_held(&clp->cl_lock);
234
235	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
236			    so_strhash) {
237		if (!so->so_is_open_owner)
238			continue;
239		if (same_owner_str(so, &open->op_owner))
240			return openowner(nfs4_get_stateowner(so));
241	}
242	return NULL;
243}
244
245static struct nfs4_openowner *
246find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
247			struct nfs4_client *clp)
248{
249	struct nfs4_openowner *oo;
250
251	spin_lock(&clp->cl_lock);
252	oo = find_openstateowner_str_locked(hashval, open, clp);
253	spin_unlock(&clp->cl_lock);
254	return oo;
255}
256
257static inline u32
258opaque_hashval(const void *ptr, int nbytes)
259{
260	unsigned char *cptr = (unsigned char *) ptr;
261
262	u32 x = 0;
263	while (nbytes--) {
264		x *= 37;
265		x += *cptr++;
266	}
267	return x;
268}
269
270static void nfsd4_free_file_rcu(struct rcu_head *rcu)
271{
272	struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
273
274	kmem_cache_free(file_slab, fp);
275}
276
277void
278put_nfs4_file(struct nfs4_file *fi)
279{
280	might_lock(&state_lock);
281
282	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
283		hlist_del_rcu(&fi->fi_hash);
284		spin_unlock(&state_lock);
285		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
286		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
287		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
288	}
289}
290
291static struct file *
292__nfs4_get_fd(struct nfs4_file *f, int oflag)
293{
294	if (f->fi_fds[oflag])
295		return get_file(f->fi_fds[oflag]);
296	return NULL;
297}
298
299static struct file *
300find_writeable_file_locked(struct nfs4_file *f)
301{
302	struct file *ret;
303
304	lockdep_assert_held(&f->fi_lock);
305
306	ret = __nfs4_get_fd(f, O_WRONLY);
307	if (!ret)
308		ret = __nfs4_get_fd(f, O_RDWR);
309	return ret;
310}
311
312static struct file *
313find_writeable_file(struct nfs4_file *f)
314{
315	struct file *ret;
316
317	spin_lock(&f->fi_lock);
318	ret = find_writeable_file_locked(f);
319	spin_unlock(&f->fi_lock);
320
321	return ret;
322}
323
324static struct file *find_readable_file_locked(struct nfs4_file *f)
325{
326	struct file *ret;
327
328	lockdep_assert_held(&f->fi_lock);
329
330	ret = __nfs4_get_fd(f, O_RDONLY);
331	if (!ret)
332		ret = __nfs4_get_fd(f, O_RDWR);
333	return ret;
334}
335
336static struct file *
337find_readable_file(struct nfs4_file *f)
338{
339	struct file *ret;
340
341	spin_lock(&f->fi_lock);
342	ret = find_readable_file_locked(f);
343	spin_unlock(&f->fi_lock);
344
345	return ret;
346}
347
348struct file *
349find_any_file(struct nfs4_file *f)
350{
351	struct file *ret;
352
353	spin_lock(&f->fi_lock);
354	ret = __nfs4_get_fd(f, O_RDWR);
355	if (!ret) {
356		ret = __nfs4_get_fd(f, O_WRONLY);
357		if (!ret)
358			ret = __nfs4_get_fd(f, O_RDONLY);
359	}
360	spin_unlock(&f->fi_lock);
361	return ret;
362}
363
364static atomic_long_t num_delegations;
365unsigned long max_delegations;
366
367/*
368 * Open owner state (share locks)
369 */
370
371/* hash tables for lock and open owners */
372#define OWNER_HASH_BITS              8
373#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
374#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
375
376static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
377{
378	unsigned int ret;
379
380	ret = opaque_hashval(ownername->data, ownername->len);
381	return ret & OWNER_HASH_MASK;
382}
383
384/* hash table for nfs4_file */
385#define FILE_HASH_BITS                   8
386#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
387
388static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
389{
390	return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
391}
392
393static unsigned int file_hashval(struct knfsd_fh *fh)
394{
395	return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
396}
397
398static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
399
400static void
401__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
402{
403	lockdep_assert_held(&fp->fi_lock);
404
405	if (access & NFS4_SHARE_ACCESS_WRITE)
406		atomic_inc(&fp->fi_access[O_WRONLY]);
407	if (access & NFS4_SHARE_ACCESS_READ)
408		atomic_inc(&fp->fi_access[O_RDONLY]);
409}
410
411static __be32
412nfs4_file_get_access(struct nfs4_file *fp, u32 access)
413{
414	lockdep_assert_held(&fp->fi_lock);
415
416	/* Does this access mode make sense? */
417	if (access & ~NFS4_SHARE_ACCESS_BOTH)
418		return nfserr_inval;
419
420	/* Does it conflict with a deny mode already set? */
421	if ((access & fp->fi_share_deny) != 0)
422		return nfserr_share_denied;
423
424	__nfs4_file_get_access(fp, access);
425	return nfs_ok;
426}
427
428static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
429{
430	/* Common case is that there is no deny mode. */
431	if (deny) {
432		/* Does this deny mode make sense? */
433		if (deny & ~NFS4_SHARE_DENY_BOTH)
434			return nfserr_inval;
435
436		if ((deny & NFS4_SHARE_DENY_READ) &&
437		    atomic_read(&fp->fi_access[O_RDONLY]))
438			return nfserr_share_denied;
439
440		if ((deny & NFS4_SHARE_DENY_WRITE) &&
441		    atomic_read(&fp->fi_access[O_WRONLY]))
442			return nfserr_share_denied;
443	}
444	return nfs_ok;
445}
446
447static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
448{
449	might_lock(&fp->fi_lock);
450
451	if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
452		struct file *f1 = NULL;
453		struct file *f2 = NULL;
454
455		swap(f1, fp->fi_fds[oflag]);
456		if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
457			swap(f2, fp->fi_fds[O_RDWR]);
458		spin_unlock(&fp->fi_lock);
459		if (f1)
460			fput(f1);
461		if (f2)
462			fput(f2);
463	}
464}
465
466static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
467{
468	WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
469
470	if (access & NFS4_SHARE_ACCESS_WRITE)
471		__nfs4_file_put_access(fp, O_WRONLY);
472	if (access & NFS4_SHARE_ACCESS_READ)
473		__nfs4_file_put_access(fp, O_RDONLY);
474}
475
476/*
477 * Allocate a new open/delegation state counter. This is needed for
478 * pNFS for proper return on close semantics.
479 *
480 * Note that we only allocate it for pNFS-enabled exports, otherwise
481 * all pointers to struct nfs4_clnt_odstate are always NULL.
482 */
483static struct nfs4_clnt_odstate *
484alloc_clnt_odstate(struct nfs4_client *clp)
485{
486	struct nfs4_clnt_odstate *co;
487
488	co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
489	if (co) {
490		co->co_client = clp;
491		atomic_set(&co->co_odcount, 1);
492	}
493	return co;
494}
495
496static void
497hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
498{
499	struct nfs4_file *fp = co->co_file;
500
501	lockdep_assert_held(&fp->fi_lock);
502	list_add(&co->co_perfile, &fp->fi_clnt_odstate);
503}
504
505static inline void
506get_clnt_odstate(struct nfs4_clnt_odstate *co)
507{
508	if (co)
509		atomic_inc(&co->co_odcount);
510}
511
512static void
513put_clnt_odstate(struct nfs4_clnt_odstate *co)
514{
515	struct nfs4_file *fp;
516
517	if (!co)
518		return;
519
520	fp = co->co_file;
521	if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
522		list_del(&co->co_perfile);
523		spin_unlock(&fp->fi_lock);
524
525		nfsd4_return_all_file_layouts(co->co_client, fp);
526		kmem_cache_free(odstate_slab, co);
527	}
528}
529
530static struct nfs4_clnt_odstate *
531find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
532{
533	struct nfs4_clnt_odstate *co;
534	struct nfs4_client *cl;
535
536	if (!new)
537		return NULL;
538
539	cl = new->co_client;
540
541	spin_lock(&fp->fi_lock);
542	list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
543		if (co->co_client == cl) {
544			get_clnt_odstate(co);
545			goto out;
546		}
547	}
548	co = new;
549	co->co_file = fp;
550	hash_clnt_odstate_locked(new);
551out:
552	spin_unlock(&fp->fi_lock);
553	return co;
554}
555
556struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
557					 struct kmem_cache *slab)
558{
559	struct nfs4_stid *stid;
560	int new_id;
561
562	stid = kmem_cache_zalloc(slab, GFP_KERNEL);
563	if (!stid)
564		return NULL;
565
566	idr_preload(GFP_KERNEL);
567	spin_lock(&cl->cl_lock);
568	new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
569	spin_unlock(&cl->cl_lock);
570	idr_preload_end();
571	if (new_id < 0)
572		goto out_free;
573	stid->sc_client = cl;
574	stid->sc_stateid.si_opaque.so_id = new_id;
575	stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
576	/* Will be incremented before return to client: */
577	atomic_set(&stid->sc_count, 1);
578
579	/*
580	 * It shouldn't be a problem to reuse an opaque stateid value.
581	 * I don't think it is for 4.1.  But with 4.0 I worry that, for
582	 * example, a stray write retransmission could be accepted by
583	 * the server when it should have been rejected.  Therefore,
584	 * adopt a trick from the sctp code to attempt to maximize the
585	 * amount of time until an id is reused, by ensuring they always
586	 * "increase" (mod INT_MAX):
587	 */
588	return stid;
589out_free:
590	kmem_cache_free(slab, stid);
591	return NULL;
592}
593
594static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
595{
596	struct nfs4_stid *stid;
597	struct nfs4_ol_stateid *stp;
598
599	stid = nfs4_alloc_stid(clp, stateid_slab);
600	if (!stid)
601		return NULL;
602
603	stp = openlockstateid(stid);
604	stp->st_stid.sc_free = nfs4_free_ol_stateid;
605	return stp;
606}
607
608static void nfs4_free_deleg(struct nfs4_stid *stid)
609{
610	kmem_cache_free(deleg_slab, stid);
611	atomic_long_dec(&num_delegations);
612}
613
614/*
615 * When we recall a delegation, we should be careful not to hand it
616 * out again straight away.
617 * To ensure this we keep a pair of bloom filters ('new' and 'old')
618 * in which the filehandles of recalled delegations are "stored".
619 * If a filehandle appear in either filter, a delegation is blocked.
620 * When a delegation is recalled, the filehandle is stored in the "new"
621 * filter.
622 * Every 30 seconds we swap the filters and clear the "new" one,
623 * unless both are empty of course.
624 *
625 * Each filter is 256 bits.  We hash the filehandle to 32bit and use the
626 * low 3 bytes as hash-table indices.
627 *
628 * 'blocked_delegations_lock', which is always taken in block_delegations(),
629 * is used to manage concurrent access.  Testing does not need the lock
630 * except when swapping the two filters.
631 */
632static DEFINE_SPINLOCK(blocked_delegations_lock);
633static struct bloom_pair {
634	int	entries, old_entries;
635	time_t	swap_time;
636	int	new; /* index into 'set' */
637	DECLARE_BITMAP(set[2], 256);
638} blocked_delegations;
639
640static int delegation_blocked(struct knfsd_fh *fh)
641{
642	u32 hash;
643	struct bloom_pair *bd = &blocked_delegations;
644
645	if (bd->entries == 0)
646		return 0;
647	if (seconds_since_boot() - bd->swap_time > 30) {
648		spin_lock(&blocked_delegations_lock);
649		if (seconds_since_boot() - bd->swap_time > 30) {
650			bd->entries -= bd->old_entries;
651			bd->old_entries = bd->entries;
652			memset(bd->set[bd->new], 0,
653			       sizeof(bd->set[0]));
654			bd->new = 1-bd->new;
655			bd->swap_time = seconds_since_boot();
656		}
657		spin_unlock(&blocked_delegations_lock);
658	}
659	hash = jhash(&fh->fh_base, fh->fh_size, 0);
660	if (test_bit(hash&255, bd->set[0]) &&
661	    test_bit((hash>>8)&255, bd->set[0]) &&
662	    test_bit((hash>>16)&255, bd->set[0]))
663		return 1;
664
665	if (test_bit(hash&255, bd->set[1]) &&
666	    test_bit((hash>>8)&255, bd->set[1]) &&
667	    test_bit((hash>>16)&255, bd->set[1]))
668		return 1;
669
670	return 0;
671}
672
673static void block_delegations(struct knfsd_fh *fh)
674{
675	u32 hash;
676	struct bloom_pair *bd = &blocked_delegations;
677
678	hash = jhash(&fh->fh_base, fh->fh_size, 0);
679
680	spin_lock(&blocked_delegations_lock);
681	__set_bit(hash&255, bd->set[bd->new]);
682	__set_bit((hash>>8)&255, bd->set[bd->new]);
683	__set_bit((hash>>16)&255, bd->set[bd->new]);
684	if (bd->entries == 0)
685		bd->swap_time = seconds_since_boot();
686	bd->entries += 1;
687	spin_unlock(&blocked_delegations_lock);
688}
689
690static struct nfs4_delegation *
691alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
692		 struct nfs4_clnt_odstate *odstate)
693{
694	struct nfs4_delegation *dp;
695	long n;
696
697	dprintk("NFSD alloc_init_deleg\n");
698	n = atomic_long_inc_return(&num_delegations);
699	if (n < 0 || n > max_delegations)
700		goto out_dec;
701	if (delegation_blocked(&current_fh->fh_handle))
702		goto out_dec;
703	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
704	if (dp == NULL)
705		goto out_dec;
706
707	dp->dl_stid.sc_free = nfs4_free_deleg;
708	/*
709	 * delegation seqid's are never incremented.  The 4.1 special
710	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
711	 * 0 anyway just for consistency and use 1:
712	 */
713	dp->dl_stid.sc_stateid.si_generation = 1;
714	INIT_LIST_HEAD(&dp->dl_perfile);
715	INIT_LIST_HEAD(&dp->dl_perclnt);
716	INIT_LIST_HEAD(&dp->dl_recall_lru);
717	dp->dl_clnt_odstate = odstate;
718	get_clnt_odstate(odstate);
719	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
720	dp->dl_retries = 1;
721	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
722		      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
723	return dp;
724out_dec:
725	atomic_long_dec(&num_delegations);
726	return NULL;
727}
728
729void
730nfs4_put_stid(struct nfs4_stid *s)
731{
732	struct nfs4_file *fp = s->sc_file;
733	struct nfs4_client *clp = s->sc_client;
734
735	might_lock(&clp->cl_lock);
736
737	if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
738		wake_up_all(&close_wq);
739		return;
740	}
741	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
742	spin_unlock(&clp->cl_lock);
743	s->sc_free(s);
744	if (fp)
745		put_nfs4_file(fp);
746}
747
748static void nfs4_put_deleg_lease(struct nfs4_file *fp)
749{
750	struct file *filp = NULL;
751
752	spin_lock(&fp->fi_lock);
753	if (fp->fi_deleg_file && --fp->fi_delegees == 0)
754		swap(filp, fp->fi_deleg_file);
755	spin_unlock(&fp->fi_lock);
756
757	if (filp) {
758		vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
759		fput(filp);
760	}
761}
762
763void nfs4_unhash_stid(struct nfs4_stid *s)
764{
765	s->sc_type = 0;
766}
767
768/**
769 * nfs4_get_existing_delegation - Discover if this delegation already exists
770 * @clp:     a pointer to the nfs4_client we're granting a delegation to
771 * @fp:      a pointer to the nfs4_file we're granting a delegation on
772 *
773 * Return:
774 *      On success: NULL if an existing delegation was not found.
775 *
776 *      On error: -EAGAIN if one was previously granted to this nfs4_client
777 *                 for this nfs4_file.
778 *
779 */
780
781static int
782nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
783{
784	struct nfs4_delegation *searchdp = NULL;
785	struct nfs4_client *searchclp = NULL;
786
787	lockdep_assert_held(&state_lock);
788	lockdep_assert_held(&fp->fi_lock);
789
790	list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
791		searchclp = searchdp->dl_stid.sc_client;
792		if (clp == searchclp) {
793			return -EAGAIN;
794		}
795	}
796	return 0;
797}
798
799/**
800 * hash_delegation_locked - Add a delegation to the appropriate lists
801 * @dp:     a pointer to the nfs4_delegation we are adding.
802 * @fp:     a pointer to the nfs4_file we're granting a delegation on
803 *
804 * Return:
805 *      On success: NULL if the delegation was successfully hashed.
806 *
807 *      On error: -EAGAIN if one was previously granted to this
808 *                 nfs4_client for this nfs4_file. Delegation is not hashed.
809 *
810 */
811
812static int
813hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
814{
815	int status;
816	struct nfs4_client *clp = dp->dl_stid.sc_client;
817
818	lockdep_assert_held(&state_lock);
819	lockdep_assert_held(&fp->fi_lock);
820
821	status = nfs4_get_existing_delegation(clp, fp);
822	if (status)
823		return status;
824	++fp->fi_delegees;
825	atomic_inc(&dp->dl_stid.sc_count);
826	dp->dl_stid.sc_type = NFS4_DELEG_STID;
827	list_add(&dp->dl_perfile, &fp->fi_delegations);
828	list_add(&dp->dl_perclnt, &clp->cl_delegations);
829	return 0;
830}
831
832static bool
833unhash_delegation_locked(struct nfs4_delegation *dp)
834{
835	struct nfs4_file *fp = dp->dl_stid.sc_file;
836
837	lockdep_assert_held(&state_lock);
838
839	if (list_empty(&dp->dl_perfile))
840		return false;
841
842	dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
843	/* Ensure that deleg break won't try to requeue it */
844	++dp->dl_time;
845	spin_lock(&fp->fi_lock);
846	list_del_init(&dp->dl_perclnt);
847	list_del_init(&dp->dl_recall_lru);
848	list_del_init(&dp->dl_perfile);
849	spin_unlock(&fp->fi_lock);
850	return true;
851}
852
853static void destroy_delegation(struct nfs4_delegation *dp)
854{
855	bool unhashed;
856
857	spin_lock(&state_lock);
858	unhashed = unhash_delegation_locked(dp);
859	spin_unlock(&state_lock);
860	if (unhashed) {
861		put_clnt_odstate(dp->dl_clnt_odstate);
862		nfs4_put_deleg_lease(dp->dl_stid.sc_file);
863		nfs4_put_stid(&dp->dl_stid);
864	}
865}
866
867static void revoke_delegation(struct nfs4_delegation *dp)
868{
869	struct nfs4_client *clp = dp->dl_stid.sc_client;
870
871	WARN_ON(!list_empty(&dp->dl_recall_lru));
872
873	put_clnt_odstate(dp->dl_clnt_odstate);
874	nfs4_put_deleg_lease(dp->dl_stid.sc_file);
875
876	if (clp->cl_minorversion == 0)
877		nfs4_put_stid(&dp->dl_stid);
878	else {
879		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
880		spin_lock(&clp->cl_lock);
881		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
882		spin_unlock(&clp->cl_lock);
883	}
884}
885
886/*
887 * SETCLIENTID state
888 */
889
890static unsigned int clientid_hashval(u32 id)
891{
892	return id & CLIENT_HASH_MASK;
893}
894
895static unsigned int clientstr_hashval(const char *name)
896{
897	return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
898}
899
900/*
901 * We store the NONE, READ, WRITE, and BOTH bits separately in the
902 * st_{access,deny}_bmap field of the stateid, in order to track not
903 * only what share bits are currently in force, but also what
904 * combinations of share bits previous opens have used.  This allows us
905 * to enforce the recommendation of rfc 3530 14.2.19 that the server
906 * return an error if the client attempt to downgrade to a combination
907 * of share bits not explicable by closing some of its previous opens.
908 *
909 * XXX: This enforcement is actually incomplete, since we don't keep
910 * track of access/deny bit combinations; so, e.g., we allow:
911 *
912 *	OPEN allow read, deny write
913 *	OPEN allow both, deny none
914 *	DOWNGRADE allow read, deny none
915 *
916 * which we should reject.
917 */
918static unsigned int
919bmap_to_share_mode(unsigned long bmap) {
920	int i;
921	unsigned int access = 0;
922
923	for (i = 1; i < 4; i++) {
924		if (test_bit(i, &bmap))
925			access |= i;
926	}
927	return access;
928}
929
930/* set share access for a given stateid */
931static inline void
932set_access(u32 access, struct nfs4_ol_stateid *stp)
933{
934	unsigned char mask = 1 << access;
935
936	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
937	stp->st_access_bmap |= mask;
938}
939
940/* clear share access for a given stateid */
941static inline void
942clear_access(u32 access, struct nfs4_ol_stateid *stp)
943{
944	unsigned char mask = 1 << access;
945
946	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
947	stp->st_access_bmap &= ~mask;
948}
949
950/* test whether a given stateid has access */
951static inline bool
952test_access(u32 access, struct nfs4_ol_stateid *stp)
953{
954	unsigned char mask = 1 << access;
955
956	return (bool)(stp->st_access_bmap & mask);
957}
958
959/* set share deny for a given stateid */
960static inline void
961set_deny(u32 deny, struct nfs4_ol_stateid *stp)
962{
963	unsigned char mask = 1 << deny;
964
965	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
966	stp->st_deny_bmap |= mask;
967}
968
969/* clear share deny for a given stateid */
970static inline void
971clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
972{
973	unsigned char mask = 1 << deny;
974
975	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
976	stp->st_deny_bmap &= ~mask;
977}
978
979/* test whether a given stateid is denying specific access */
980static inline bool
981test_deny(u32 deny, struct nfs4_ol_stateid *stp)
982{
983	unsigned char mask = 1 << deny;
984
985	return (bool)(stp->st_deny_bmap & mask);
986}
987
988static int nfs4_access_to_omode(u32 access)
989{
990	switch (access & NFS4_SHARE_ACCESS_BOTH) {
991	case NFS4_SHARE_ACCESS_READ:
992		return O_RDONLY;
993	case NFS4_SHARE_ACCESS_WRITE:
994		return O_WRONLY;
995	case NFS4_SHARE_ACCESS_BOTH:
996		return O_RDWR;
997	}
998	WARN_ON_ONCE(1);
999	return O_RDONLY;
1000}
1001
1002/*
1003 * A stateid that had a deny mode associated with it is being released
1004 * or downgraded. Recalculate the deny mode on the file.
1005 */
1006static void
1007recalculate_deny_mode(struct nfs4_file *fp)
1008{
1009	struct nfs4_ol_stateid *stp;
1010
1011	spin_lock(&fp->fi_lock);
1012	fp->fi_share_deny = 0;
1013	list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
1014		fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
1015	spin_unlock(&fp->fi_lock);
1016}
1017
1018static void
1019reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
1020{
1021	int i;
1022	bool change = false;
1023
1024	for (i = 1; i < 4; i++) {
1025		if ((i & deny) != i) {
1026			change = true;
1027			clear_deny(i, stp);
1028		}
1029	}
1030
1031	/* Recalculate per-file deny mode if there was a change */
1032	if (change)
1033		recalculate_deny_mode(stp->st_stid.sc_file);
1034}
1035
1036/* release all access and file references for a given stateid */
1037static void
1038release_all_access(struct nfs4_ol_stateid *stp)
1039{
1040	int i;
1041	struct nfs4_file *fp = stp->st_stid.sc_file;
1042
1043	if (fp && stp->st_deny_bmap != 0)
1044		recalculate_deny_mode(fp);
1045
1046	for (i = 1; i < 4; i++) {
1047		if (test_access(i, stp))
1048			nfs4_file_put_access(stp->st_stid.sc_file, i);
1049		clear_access(i, stp);
1050	}
1051}
1052
1053static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
1054{
1055	struct nfs4_client *clp = sop->so_client;
1056
1057	might_lock(&clp->cl_lock);
1058
1059	if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
1060		return;
1061	sop->so_ops->so_unhash(sop);
1062	spin_unlock(&clp->cl_lock);
1063	kfree(sop->so_owner.data);
1064	sop->so_ops->so_free(sop);
1065}
1066
1067static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
1068{
1069	struct nfs4_file *fp = stp->st_stid.sc_file;
1070
1071	lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
1072
1073	if (list_empty(&stp->st_perfile))
1074		return false;
1075
1076	spin_lock(&fp->fi_lock);
1077	list_del_init(&stp->st_perfile);
1078	spin_unlock(&fp->fi_lock);
1079	list_del(&stp->st_perstateowner);
1080	return true;
1081}
1082
1083static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
1084{
1085	struct nfs4_ol_stateid *stp = openlockstateid(stid);
1086
1087	put_clnt_odstate(stp->st_clnt_odstate);
1088	release_all_access(stp);
1089	if (stp->st_stateowner)
1090		nfs4_put_stateowner(stp->st_stateowner);
1091	kmem_cache_free(stateid_slab, stid);
1092}
1093
1094static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
1095{
1096	struct nfs4_ol_stateid *stp = openlockstateid(stid);
1097	struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
1098	struct file *file;
1099
1100	file = find_any_file(stp->st_stid.sc_file);
1101	if (file)
1102		filp_close(file, (fl_owner_t)lo);
1103	nfs4_free_ol_stateid(stid);
1104}
1105
1106/*
1107 * Put the persistent reference to an already unhashed generic stateid, while
1108 * holding the cl_lock. If it's the last reference, then put it onto the
1109 * reaplist for later destruction.
1110 */
1111static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
1112				       struct list_head *reaplist)
1113{
1114	struct nfs4_stid *s = &stp->st_stid;
1115	struct nfs4_client *clp = s->sc_client;
1116
1117	lockdep_assert_held(&clp->cl_lock);
1118
1119	WARN_ON_ONCE(!list_empty(&stp->st_locks));
1120
1121	if (!atomic_dec_and_test(&s->sc_count)) {
1122		wake_up_all(&close_wq);
1123		return;
1124	}
1125
1126	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
1127	list_add(&stp->st_locks, reaplist);
1128}
1129
1130static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
1131{
1132	struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
1133
1134	lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
1135
1136	list_del_init(&stp->st_locks);
1137	nfs4_unhash_stid(&stp->st_stid);
1138	return unhash_ol_stateid(stp);
1139}
1140
1141static void release_lock_stateid(struct nfs4_ol_stateid *stp)
1142{
1143	struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
1144	bool unhashed;
1145
1146	spin_lock(&oo->oo_owner.so_client->cl_lock);
1147	unhashed = unhash_lock_stateid(stp);
1148	spin_unlock(&oo->oo_owner.so_client->cl_lock);
1149	if (unhashed)
1150		nfs4_put_stid(&stp->st_stid);
1151}
1152
1153static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
1154{
1155	struct nfs4_client *clp = lo->lo_owner.so_client;
1156
1157	lockdep_assert_held(&clp->cl_lock);
1158
1159	list_del_init(&lo->lo_owner.so_strhash);
1160}
1161
1162/*
1163 * Free a list of generic stateids that were collected earlier after being
1164 * fully unhashed.
1165 */
1166static void
1167free_ol_stateid_reaplist(struct list_head *reaplist)
1168{
1169	struct nfs4_ol_stateid *stp;
1170	struct nfs4_file *fp;
1171
1172	might_sleep();
1173
1174	while (!list_empty(reaplist)) {
1175		stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
1176				       st_locks);
1177		list_del(&stp->st_locks);
1178		fp = stp->st_stid.sc_file;
1179		stp->st_stid.sc_free(&stp->st_stid);
1180		if (fp)
1181			put_nfs4_file(fp);
1182	}
1183}
1184
1185static void release_lockowner(struct nfs4_lockowner *lo)
1186{
1187	struct nfs4_client *clp = lo->lo_owner.so_client;
1188	struct nfs4_ol_stateid *stp;
1189	struct list_head reaplist;
1190
1191	INIT_LIST_HEAD(&reaplist);
1192
1193	spin_lock(&clp->cl_lock);
1194	unhash_lockowner_locked(lo);
1195	while (!list_empty(&lo->lo_owner.so_stateids)) {
1196		stp = list_first_entry(&lo->lo_owner.so_stateids,
1197				struct nfs4_ol_stateid, st_perstateowner);
1198		WARN_ON(!unhash_lock_stateid(stp));
1199		put_ol_stateid_locked(stp, &reaplist);
1200	}
1201	spin_unlock(&clp->cl_lock);
1202	free_ol_stateid_reaplist(&reaplist);
1203	nfs4_put_stateowner(&lo->lo_owner);
1204}
1205
1206static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
1207				       struct list_head *reaplist)
1208{
1209	struct nfs4_ol_stateid *stp;
1210
1211	lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock);
1212
1213	while (!list_empty(&open_stp->st_locks)) {
1214		stp = list_entry(open_stp->st_locks.next,
1215				struct nfs4_ol_stateid, st_locks);
1216		WARN_ON(!unhash_lock_stateid(stp));
1217		put_ol_stateid_locked(stp, reaplist);
1218	}
1219}
1220
1221static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
1222				struct list_head *reaplist)
1223{
1224	bool unhashed;
1225
1226	lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
1227
1228	unhashed = unhash_ol_stateid(stp);
1229	release_open_stateid_locks(stp, reaplist);
1230	return unhashed;
1231}
1232
1233static void release_open_stateid(struct nfs4_ol_stateid *stp)
1234{
1235	LIST_HEAD(reaplist);
1236
1237	spin_lock(&stp->st_stid.sc_client->cl_lock);
1238	if (unhash_open_stateid(stp, &reaplist))
1239		put_ol_stateid_locked(stp, &reaplist);
1240	spin_unlock(&stp->st_stid.sc_client->cl_lock);
1241	free_ol_stateid_reaplist(&reaplist);
1242}
1243
1244static void unhash_openowner_locked(struct nfs4_openowner *oo)
1245{
1246	struct nfs4_client *clp = oo->oo_owner.so_client;
1247
1248	lockdep_assert_held(&clp->cl_lock);
1249
1250	list_del_init(&oo->oo_owner.so_strhash);
1251	list_del_init(&oo->oo_perclient);
1252}
1253
1254static void release_last_closed_stateid(struct nfs4_openowner *oo)
1255{
1256	struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
1257					  nfsd_net_id);
1258	struct nfs4_ol_stateid *s;
1259
1260	spin_lock(&nn->client_lock);
1261	s = oo->oo_last_closed_stid;
1262	if (s) {
1263		list_del_init(&oo->oo_close_lru);
1264		oo->oo_last_closed_stid = NULL;
1265	}
1266	spin_unlock(&nn->client_lock);
1267	if (s)
1268		nfs4_put_stid(&s->st_stid);
1269}
1270
1271static void release_openowner(struct nfs4_openowner *oo)
1272{
1273	struct nfs4_ol_stateid *stp;
1274	struct nfs4_client *clp = oo->oo_owner.so_client;
1275	struct list_head reaplist;
1276
1277	INIT_LIST_HEAD(&reaplist);
1278
1279	spin_lock(&clp->cl_lock);
1280	unhash_openowner_locked(oo);
1281	while (!list_empty(&oo->oo_owner.so_stateids)) {
1282		stp = list_first_entry(&oo->oo_owner.so_stateids,
1283				struct nfs4_ol_stateid, st_perstateowner);
1284		if (unhash_open_stateid(stp, &reaplist))
1285			put_ol_stateid_locked(stp, &reaplist);
1286	}
1287	spin_unlock(&clp->cl_lock);
1288	free_ol_stateid_reaplist(&reaplist);
1289	release_last_closed_stateid(oo);
1290	nfs4_put_stateowner(&oo->oo_owner);
1291}
1292
1293static inline int
1294hash_sessionid(struct nfs4_sessionid *sessionid)
1295{
1296	struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
1297
1298	return sid->sequence % SESSION_HASH_SIZE;
1299}
1300
1301#ifdef CONFIG_SUNRPC_DEBUG
1302static inline void
1303dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
1304{
1305	u32 *ptr = (u32 *)(&sessionid->data[0]);
1306	dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
1307}
1308#else
1309static inline void
1310dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
1311{
1312}
1313#endif
1314
1315/*
1316 * Bump the seqid on cstate->replay_owner, and clear replay_owner if it
1317 * won't be used for replay.
1318 */
1319void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
1320{
1321	struct nfs4_stateowner *so = cstate->replay_owner;
1322
1323	if (nfserr == nfserr_replay_me)
1324		return;
1325
1326	if (!seqid_mutating_err(ntohl(nfserr))) {
1327		nfsd4_cstate_clear_replay(cstate);
1328		return;
1329	}
1330	if (!so)
1331		return;
1332	if (so->so_is_open_owner)
1333		release_last_closed_stateid(openowner(so));
1334	so->so_seqid++;
1335	return;
1336}
1337
1338static void
1339gen_sessionid(struct nfsd4_session *ses)
1340{
1341	struct nfs4_client *clp = ses->se_client;
1342	struct nfsd4_sessionid *sid;
1343
1344	sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
1345	sid->clientid = clp->cl_clientid;
1346	sid->sequence = current_sessionid++;
1347	sid->reserved = 0;
1348}
1349
1350/*
1351 * The protocol defines ca_maxresponssize_cached to include the size of
1352 * the rpc header, but all we need to cache is the data starting after
1353 * the end of the initial SEQUENCE operation--the rest we regenerate
1354 * each time.  Therefore we can advertise a ca_maxresponssize_cached
1355 * value that is the number of bytes in our cache plus a few additional
1356 * bytes.  In order to stay on the safe side, and not promise more than
1357 * we can cache, those additional bytes must be the minimum possible: 24
1358 * bytes of rpc header (xid through accept state, with AUTH_NULL
1359 * verifier), 12 for the compound header (with zero-length tag), and 44
1360 * for the SEQUENCE op response:
1361 */
1362#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
1363
1364static void
1365free_session_slots(struct nfsd4_session *ses)
1366{
1367	int i;
1368
1369	for (i = 0; i < ses->se_fchannel.maxreqs; i++)
1370		kfree(ses->se_slots[i]);
1371}
1372
1373/*
1374 * We don't actually need to cache the rpc and session headers, so we
1375 * can allocate a little less for each slot:
1376 */
1377static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
1378{
1379	u32 size;
1380
1381	if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
1382		size = 0;
1383	else
1384		size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
1385	return size + sizeof(struct nfsd4_slot);
1386}
1387
1388/*
1389 * XXX: If we run out of reserved DRC memory we could (up to a point)
1390 * re-negotiate active sessions and reduce their slot usage to make
1391 * room for new connections. For now we just fail the create session.
1392 */
1393static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
1394{
1395	u32 slotsize = slot_bytes(ca);
1396	u32 num = ca->maxreqs;
1397	int avail;
1398
1399	spin_lock(&nfsd_drc_lock);
1400	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
1401		    nfsd_drc_max_mem - nfsd_drc_mem_used);
1402	num = min_t(int, num, avail / slotsize);
1403	nfsd_drc_mem_used += num * slotsize;
1404	spin_unlock(&nfsd_drc_lock);
1405
1406	return num;
1407}
1408
1409static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
1410{
1411	int slotsize = slot_bytes(ca);
1412
1413	spin_lock(&nfsd_drc_lock);
1414	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
1415	spin_unlock(&nfsd_drc_lock);
1416}
1417
1418static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
1419					   struct nfsd4_channel_attrs *battrs)
1420{
1421	int numslots = fattrs->maxreqs;
1422	int slotsize = slot_bytes(fattrs);
1423	struct nfsd4_session *new;
1424	int mem, i;
1425
1426	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
1427			+ sizeof(struct nfsd4_session) > PAGE_SIZE);
1428	mem = numslots * sizeof(struct nfsd4_slot *);
1429
1430	new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
1431	if (!new)
1432		return NULL;
1433	/* allocate each struct nfsd4_slot and data cache in one piece */
1434	for (i = 0; i < numslots; i++) {
1435		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
1436		if (!new->se_slots[i])
1437			goto out_free;
1438	}
1439
1440	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
1441	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
1442
1443	return new;
1444out_free:
1445	while (i--)
1446		kfree(new->se_slots[i]);
1447	kfree(new);
1448	return NULL;
1449}
1450
1451static void free_conn(struct nfsd4_conn *c)
1452{
1453	svc_xprt_put(c->cn_xprt);
1454	kfree(c);
1455}
1456
1457static void nfsd4_conn_lost(struct svc_xpt_user *u)
1458{
1459	struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
1460	struct nfs4_client *clp = c->cn_session->se_client;
1461
1462	spin_lock(&clp->cl_lock);
1463	if (!list_empty(&c->cn_persession)) {
1464		list_del(&c->cn_persession);
1465		free_conn(c);
1466	}
1467	nfsd4_probe_callback(clp);
1468	spin_unlock(&clp->cl_lock);
1469}
1470
1471static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
1472{
1473	struct nfsd4_conn *conn;
1474
1475	conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
1476	if (!conn)
1477		return NULL;
1478	svc_xprt_get(rqstp->rq_xprt);
1479	conn->cn_xprt = rqstp->rq_xprt;
1480	conn->cn_flags = flags;
1481	INIT_LIST_HEAD(&conn->cn_xpt_user.list);
1482	return conn;
1483}
1484
1485static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
1486{
1487	conn->cn_session = ses;
1488	list_add(&conn->cn_persession, &ses->se_conns);
1489}
1490
1491static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
1492{
1493	struct nfs4_client *clp = ses->se_client;
1494
1495	spin_lock(&clp->cl_lock);
1496	__nfsd4_hash_conn(conn, ses);
1497	spin_unlock(&clp->cl_lock);
1498}
1499
1500static int nfsd4_register_conn(struct nfsd4_conn *conn)
1501{
1502	conn->cn_xpt_user.callback = nfsd4_conn_lost;
1503	return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
1504}
1505
1506static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses)
1507{
1508	int ret;
1509
1510	nfsd4_hash_conn(conn, ses);
1511	ret = nfsd4_register_conn(conn);
1512	if (ret)
1513		/* oops; xprt is already down: */
1514		nfsd4_conn_lost(&conn->cn_xpt_user);
1515	/* We may have gained or lost a callback channel: */
1516	nfsd4_probe_callback_sync(ses->se_client);
1517}
1518
1519static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
1520{
1521	u32 dir = NFS4_CDFC4_FORE;
1522
1523	if (cses->flags & SESSION4_BACK_CHAN)
1524		dir |= NFS4_CDFC4_BACK;
1525	return alloc_conn(rqstp, dir);
1526}
1527
1528/* must be called under client_lock */
1529static void nfsd4_del_conns(struct nfsd4_session *s)
1530{
1531	struct nfs4_client *clp = s->se_client;
1532	struct nfsd4_conn *c;
1533
1534	spin_lock(&clp->cl_lock);
1535	while (!list_empty(&s->se_conns)) {
1536		c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
1537		list_del_init(&c->cn_persession);
1538		spin_unlock(&clp->cl_lock);
1539
1540		unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
1541		free_conn(c);
1542
1543		spin_lock(&clp->cl_lock);
1544	}
1545	spin_unlock(&clp->cl_lock);
1546}
1547
1548static void __free_session(struct nfsd4_session *ses)
1549{
1550	free_session_slots(ses);
1551	kfree(ses);
1552}
1553
1554static void free_session(struct nfsd4_session *ses)
1555{
1556	nfsd4_del_conns(ses);
1557	nfsd4_put_drc_mem(&ses->se_fchannel);
1558	__free_session(ses);
1559}
1560
1561static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
1562{
1563	int idx;
1564	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1565
1566	new->se_client = clp;
1567	gen_sessionid(new);
1568
1569	INIT_LIST_HEAD(&new->se_conns);
1570
1571	new->se_cb_seq_nr = 1;
1572	new->se_flags = cses->flags;
1573	new->se_cb_prog = cses->callback_prog;
1574	new->se_cb_sec = cses->cb_sec;
1575	atomic_set(&new->se_ref, 0);
1576	idx = hash_sessionid(&new->se_sessionid);
1577	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
1578	spin_lock(&clp->cl_lock);
1579	list_add(&new->se_perclnt, &clp->cl_sessions);
1580	spin_unlock(&clp->cl_lock);
1581
1582	{
1583		struct sockaddr *sa = svc_addr(rqstp);
1584		/*
1585		 * This is a little silly; with sessions there's no real
1586		 * use for the callback address.  Use the peer address
1587		 * as a reasonable default for now, but consider fixing
1588		 * the rpc client not to require an address in the
1589		 * future:
1590		 */
1591		rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
1592		clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1593	}
1594}
1595
1596/* caller must hold client_lock */
1597static struct nfsd4_session *
1598__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1599{
1600	struct nfsd4_session *elem;
1601	int idx;
1602	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1603
1604	lockdep_assert_held(&nn->client_lock);
1605
1606	dump_sessionid(__func__, sessionid);
1607	idx = hash_sessionid(sessionid);
1608	/* Search in the appropriate list */
1609	list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) {
1610		if (!memcmp(elem->se_sessionid.data, sessionid->data,
1611			    NFS4_MAX_SESSIONID_LEN)) {
1612			return elem;
1613		}
1614	}
1615
1616	dprintk("%s: session not found\n", __func__);
1617	return NULL;
1618}
1619
1620static struct nfsd4_session *
1621find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
1622		__be32 *ret)
1623{
1624	struct nfsd4_session *session;
1625	__be32 status = nfserr_badsession;
1626
1627	session = __find_in_sessionid_hashtbl(sessionid, net);
1628	if (!session)
1629		goto out;
1630	status = nfsd4_get_session_locked(session);
1631	if (status)
1632		session = NULL;
1633out:
1634	*ret = status;
1635	return session;
1636}
1637
1638/* caller must hold client_lock */
1639static void
1640unhash_session(struct nfsd4_session *ses)
1641{
1642	struct nfs4_client *clp = ses->se_client;
1643	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1644
1645	lockdep_assert_held(&nn->client_lock);
1646
1647	list_del(&ses->se_hash);
1648	spin_lock(&ses->se_client->cl_lock);
1649	list_del(&ses->se_perclnt);
1650	spin_unlock(&ses->se_client->cl_lock);
1651}
1652
1653/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
1654static int
1655STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
1656{
1657	/*
1658	 * We're assuming the clid was not given out from a boot
1659	 * precisely 2^32 (about 136 years) before this one.  That seems
1660	 * a safe assumption:
1661	 */
1662	if (clid->cl_boot == (u32)nn->boot_time)
1663		return 0;
1664	dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
1665		clid->cl_boot, clid->cl_id, nn->boot_time);
1666	return 1;
1667}
1668
1669/*
1670 * XXX Should we use a slab cache ?
1671 * This type of memory management is somewhat inefficient, but we use it
1672 * anyway since SETCLIENTID is not a common operation.
1673 */
1674static struct nfs4_client *alloc_client(struct xdr_netobj name)
1675{
1676	struct nfs4_client *clp;
1677	int i;
1678
1679	clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
1680	if (clp == NULL)
1681		return NULL;
1682	clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
1683	if (clp->cl_name.data == NULL)
1684		goto err_no_name;
1685	clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
1686			OWNER_HASH_SIZE, GFP_KERNEL);
1687	if (!clp->cl_ownerstr_hashtbl)
1688		goto err_no_hashtbl;
1689	for (i = 0; i < OWNER_HASH_SIZE; i++)
1690		INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
1691	clp->cl_name.len = name.len;
1692	INIT_LIST_HEAD(&clp->cl_sessions);
1693	idr_init(&clp->cl_stateids);
1694	atomic_set(&clp->cl_refcount, 0);
1695	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
1696	INIT_LIST_HEAD(&clp->cl_idhash);
1697	INIT_LIST_HEAD(&clp->cl_openowners);
1698	INIT_LIST_HEAD(&clp->cl_delegations);
1699	INIT_LIST_HEAD(&clp->cl_lru);
1700	INIT_LIST_HEAD(&clp->cl_revoked);
1701#ifdef CONFIG_NFSD_PNFS
1702	INIT_LIST_HEAD(&clp->cl_lo_states);
1703#endif
1704	spin_lock_init(&clp->cl_lock);
1705	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1706	return clp;
1707err_no_hashtbl:
1708	kfree(clp->cl_name.data);
1709err_no_name:
1710	kfree(clp);
1711	return NULL;
1712}
1713
1714static void
1715free_client(struct nfs4_client *clp)
1716{
1717	while (!list_empty(&clp->cl_sessions)) {
1718		struct nfsd4_session *ses;
1719		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
1720				se_perclnt);
1721		list_del(&ses->se_perclnt);
1722		WARN_ON_ONCE(atomic_read(&ses->se_ref));
1723		free_session(ses);
1724	}
1725	rpc_destroy_wait_queue(&clp->cl_cb_waitq);
1726	free_svc_cred(&clp->cl_cred);
1727	kfree(clp->cl_ownerstr_hashtbl);
1728	kfree(clp->cl_name.data);
1729	idr_destroy(&clp->cl_stateids);
1730	kfree(clp);
1731}
1732
1733/* must be called under the client_lock */
1734static void
1735unhash_client_locked(struct nfs4_client *clp)
1736{
1737	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1738	struct nfsd4_session *ses;
1739
1740	lockdep_assert_held(&nn->client_lock);
1741
1742	/* Mark the client as expired! */
1743	clp->cl_time = 0;
1744	/* Make it invisible */
1745	if (!list_empty(&clp->cl_idhash)) {
1746		list_del_init(&clp->cl_idhash);
1747		if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
1748			rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
1749		else
1750			rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1751	}
1752	list_del_init(&clp->cl_lru);
1753	spin_lock(&clp->cl_lock);
1754	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
1755		list_del_init(&ses->se_hash);
1756	spin_unlock(&clp->cl_lock);
1757}
1758
1759static void
1760unhash_client(struct nfs4_client *clp)
1761{
1762	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1763
1764	spin_lock(&nn->client_lock);
1765	unhash_client_locked(clp);
1766	spin_unlock(&nn->client_lock);
1767}
1768
1769static __be32 mark_client_expired_locked(struct nfs4_client *clp)
1770{
1771	if (atomic_read(&clp->cl_refcount))
1772		return nfserr_jukebox;
1773	unhash_client_locked(clp);
1774	return nfs_ok;
1775}
1776
1777static void
1778__destroy_client(struct nfs4_client *clp)
1779{
1780	struct nfs4_openowner *oo;
1781	struct nfs4_delegation *dp;
1782	struct list_head reaplist;
1783
1784	INIT_LIST_HEAD(&reaplist);
1785	spin_lock(&state_lock);
1786	while (!list_empty(&clp->cl_delegations)) {
1787		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
1788		WARN_ON(!unhash_delegation_locked(dp));
1789		list_add(&dp->dl_recall_lru, &reaplist);
1790	}
1791	spin_unlock(&state_lock);
1792	while (!list_empty(&reaplist)) {
1793		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1794		list_del_init(&dp->dl_recall_lru);
1795		put_clnt_odstate(dp->dl_clnt_odstate);
1796		nfs4_put_deleg_lease(dp->dl_stid.sc_file);
1797		nfs4_put_stid(&dp->dl_stid);
1798	}
1799	while (!list_empty(&clp->cl_revoked)) {
1800		dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
1801		list_del_init(&dp->dl_recall_lru);
1802		nfs4_put_stid(&dp->dl_stid);
1803	}
1804	while (!list_empty(&clp->cl_openowners)) {
1805		oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
1806		nfs4_get_stateowner(&oo->oo_owner);
1807		release_openowner(oo);
1808	}
1809	nfsd4_return_all_client_layouts(clp);
1810	nfsd4_shutdown_callback(clp);
1811	if (clp->cl_cb_conn.cb_xprt)
1812		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
1813	free_client(clp);
1814}
1815
1816static void
1817destroy_client(struct nfs4_client *clp)
1818{
1819	unhash_client(clp);
1820	__destroy_client(clp);
1821}
1822
1823static void expire_client(struct nfs4_client *clp)
1824{
1825	unhash_client(clp);
1826	nfsd4_client_record_remove(clp);
1827	__destroy_client(clp);
1828}
1829
1830static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
1831{
1832	memcpy(target->cl_verifier.data, source->data,
1833			sizeof(target->cl_verifier.data));
1834}
1835
1836static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
1837{
1838	target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
1839	target->cl_clientid.cl_id = source->cl_clientid.cl_id;
1840}
1841
1842static int copy_cred(struct svc_cred *target, struct svc_cred *source)
1843{
1844	if (source->cr_principal) {
1845		target->cr_principal =
1846				kstrdup(source->cr_principal, GFP_KERNEL);
1847		if (target->cr_principal == NULL)
1848			return -ENOMEM;
1849	} else
1850		target->cr_principal = NULL;
1851	target->cr_flavor = source->cr_flavor;
1852	target->cr_uid = source->cr_uid;
1853	target->cr_gid = source->cr_gid;
1854	target->cr_group_info = source->cr_group_info;
1855	get_group_info(target->cr_group_info);
1856	target->cr_gss_mech = source->cr_gss_mech;
1857	if (source->cr_gss_mech)
1858		gss_mech_get(source->cr_gss_mech);
1859	return 0;
1860}
1861
1862static int
1863compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
1864{
1865	if (o1->len < o2->len)
1866		return -1;
1867	if (o1->len > o2->len)
1868		return 1;
1869	return memcmp(o1->data, o2->data, o1->len);
1870}
1871
1872static int same_name(const char *n1, const char *n2)
1873{
1874	return 0 == memcmp(n1, n2, HEXDIR_LEN);
1875}
1876
1877static int
1878same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
1879{
1880	return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
1881}
1882
1883static int
1884same_clid(clientid_t *cl1, clientid_t *cl2)
1885{
1886	return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
1887}
1888
1889static bool groups_equal(struct group_info *g1, struct group_info *g2)
1890{
1891	int i;
1892
1893	if (g1->ngroups != g2->ngroups)
1894		return false;
1895	for (i=0; i<g1->ngroups; i++)
1896		if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
1897			return false;
1898	return true;
1899}
1900
1901/*
1902 * RFC 3530 language requires clid_inuse be returned when the
1903 * "principal" associated with a requests differs from that previously
1904 * used.  We use uid, gid's, and gss principal string as our best
1905 * approximation.  We also don't want to allow non-gss use of a client
1906 * established using gss: in theory cr_principal should catch that
1907 * change, but in practice cr_principal can be null even in the gss case
1908 * since gssd doesn't always pass down a principal string.
1909 */
1910static bool is_gss_cred(struct svc_cred *cr)
1911{
1912	/* Is cr_flavor one of the gss "pseudoflavors"?: */
1913	return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR);
1914}
1915
1916
1917static bool
1918same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
1919{
1920	if ((is_gss_cred(cr1) != is_gss_cred(cr2))
1921		|| (!uid_eq(cr1->cr_uid, cr2->cr_uid))
1922		|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
1923		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
1924		return false;
1925	if (cr1->cr_principal == cr2->cr_principal)
1926		return true;
1927	if (!cr1->cr_principal || !cr2->cr_principal)
1928		return false;
1929	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
1930}
1931
1932static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
1933{
1934	struct svc_cred *cr = &rqstp->rq_cred;
1935	u32 service;
1936
1937	if (!cr->cr_gss_mech)
1938		return false;
1939	service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
1940	return service == RPC_GSS_SVC_INTEGRITY ||
1941	       service == RPC_GSS_SVC_PRIVACY;
1942}
1943
1944static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
1945{
1946	struct svc_cred *cr = &rqstp->rq_cred;
1947
1948	if (!cl->cl_mach_cred)
1949		return true;
1950	if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech)
1951		return false;
1952	if (!svc_rqst_integrity_protected(rqstp))
1953		return false;
1954	if (!cr->cr_principal)
1955		return false;
1956	return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
1957}
1958
1959static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
1960{
1961	__be32 verf[2];
1962
1963	/*
1964	 * This is opaque to client, so no need to byte-swap. Use
1965	 * __force to keep sparse happy
1966	 */
1967	verf[0] = (__force __be32)get_seconds();
1968	verf[1] = (__force __be32)nn->clientid_counter;
1969	memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1970}
1971
1972static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
1973{
1974	clp->cl_clientid.cl_boot = nn->boot_time;
1975	clp->cl_clientid.cl_id = nn->clientid_counter++;
1976	gen_confirm(clp, nn);
1977}
1978
1979static struct nfs4_stid *
1980find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
1981{
1982	struct nfs4_stid *ret;
1983
1984	ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id);
1985	if (!ret || !ret->sc_type)
1986		return NULL;
1987	return ret;
1988}
1989
1990static struct nfs4_stid *
1991find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
1992{
1993	struct nfs4_stid *s;
1994
1995	spin_lock(&cl->cl_lock);
1996	s = find_stateid_locked(cl, t);
1997	if (s != NULL) {
1998		if (typemask & s->sc_type)
1999			atomic_inc(&s->sc_count);
2000		else
2001			s = NULL;
2002	}
2003	spin_unlock(&cl->cl_lock);
2004	return s;
2005}
2006
2007static struct nfs4_client *create_client(struct xdr_netobj name,
2008		struct svc_rqst *rqstp, nfs4_verifier *verf)
2009{
2010	struct nfs4_client *clp;
2011	struct sockaddr *sa = svc_addr(rqstp);
2012	int ret;
2013	struct net *net = SVC_NET(rqstp);
2014
2015	clp = alloc_client(name);
2016	if (clp == NULL)
2017		return NULL;
2018
2019	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
2020	if (ret) {
2021		free_client(clp);
2022		return NULL;
2023	}
2024	nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
2025	clp->cl_time = get_seconds();
2026	clear_bit(0, &clp->cl_cb_slot_busy);
2027	copy_verf(clp, verf);
2028	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
2029	clp->cl_cb_session = NULL;
2030	clp->net = net;
2031	return clp;
2032}
2033
2034static void
2035add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
2036{
2037	struct rb_node **new = &(root->rb_node), *parent = NULL;
2038	struct nfs4_client *clp;
2039
2040	while (*new) {
2041		clp = rb_entry(*new, struct nfs4_client, cl_namenode);
2042		parent = *new;
2043
2044		if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0)
2045			new = &((*new)->rb_left);
2046		else
2047			new = &((*new)->rb_right);
2048	}
2049
2050	rb_link_node(&new_clp->cl_namenode, parent, new);
2051	rb_insert_color(&new_clp->cl_namenode, root);
2052}
2053
2054static struct nfs4_client *
2055find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
2056{
2057	int cmp;
2058	struct rb_node *node = root->rb_node;
2059	struct nfs4_client *clp;
2060
2061	while (node) {
2062		clp = rb_entry(node, struct nfs4_client, cl_namenode);
2063		cmp = compare_blob(&clp->cl_name, name);
2064		if (cmp > 0)
2065			node = node->rb_left;
2066		else if (cmp < 0)
2067			node = node->rb_right;
2068		else
2069			return clp;
2070	}
2071	return NULL;
2072}
2073
2074static void
2075add_to_unconfirmed(struct nfs4_client *clp)
2076{
2077	unsigned int idhashval;
2078	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2079
2080	lockdep_assert_held(&nn->client_lock);
2081
2082	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
2083	add_clp_to_name_tree(clp, &nn->unconf_name_tree);
2084	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
2085	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
2086	renew_client_locked(clp);
2087}
2088
2089static void
2090move_to_confirmed(struct nfs4_client *clp)
2091{
2092	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
2093	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2094
2095	lockdep_assert_held(&nn->client_lock);
2096
2097	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
2098	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
2099	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
2100	add_clp_to_name_tree(clp, &nn->conf_name_tree);
2101	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
2102	renew_client_locked(clp);
2103}
2104
2105static struct nfs4_client *
2106find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
2107{
2108	struct nfs4_client *clp;
2109	unsigned int idhashval = clientid_hashval(clid->cl_id);
2110
2111	list_for_each_entry(clp, &tbl[idhashval], cl_idhash) {
2112		if (same_clid(&clp->cl_clientid, clid)) {
2113			if ((bool)clp->cl_minorversion != sessions)
2114				return NULL;
2115			renew_client_locked(clp);
2116			return clp;
2117		}
2118	}
2119	return NULL;
2120}
2121
2122static struct nfs4_client *
2123find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
2124{
2125	struct list_head *tbl = nn->conf_id_hashtbl;
2126
2127	lockdep_assert_held(&nn->client_lock);
2128	return find_client_in_id_table(tbl, clid, sessions);
2129}
2130
2131static struct nfs4_client *
2132find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
2133{
2134	struct list_head *tbl = nn->unconf_id_hashtbl;
2135
2136	lockdep_assert_held(&nn->client_lock);
2137	return find_client_in_id_table(tbl, clid, sessions);
2138}
2139
2140static bool clp_used_exchangeid(struct nfs4_client *clp)
2141{
2142	return clp->cl_exchange_flags != 0;
2143}
2144
2145static struct nfs4_client *
2146find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
2147{
2148	lockdep_assert_held(&nn->client_lock);
2149	return find_clp_in_name_tree(name, &nn->conf_name_tree);
2150}
2151
2152static struct nfs4_client *
2153find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
2154{
2155	lockdep_assert_held(&nn->client_lock);
2156	return find_clp_in_name_tree(name, &nn->unconf_name_tree);
2157}
2158
2159static void
2160gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
2161{
2162	struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
2163	struct sockaddr	*sa = svc_addr(rqstp);
2164	u32 scopeid = rpc_get_scope_id(sa);
2165	unsigned short expected_family;
2166
2167	/* Currently, we only support tcp and tcp6 for the callback channel */
2168	if (se->se_callback_netid_len == 3 &&
2169	    !memcmp(se->se_callback_netid_val, "tcp", 3))
2170		expected_family = AF_INET;
2171	else if (se->se_callback_netid_len == 4 &&
2172		 !memcmp(se->se_callback_netid_val, "tcp6", 4))
2173		expected_family = AF_INET6;
2174	else
2175		goto out_err;
2176
2177	conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val,
2178					    se->se_callback_addr_len,
2179					    (struct sockaddr *)&conn->cb_addr,
2180					    sizeof(conn->cb_addr));
2181
2182	if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
2183		goto out_err;
2184
2185	if (conn->cb_addr.ss_family == AF_INET6)
2186		((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
2187
2188	conn->cb_prog = se->se_callback_prog;
2189	conn->cb_ident = se->se_callback_ident;
2190	memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
2191	return;
2192out_err:
2193	conn->cb_addr.ss_family = AF_UNSPEC;
2194	conn->cb_addrlen = 0;
2195	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
2196		"will not receive delegations\n",
2197		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
2198
2199	return;
2200}
2201
2202/*
2203 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
2204 */
2205static void
2206nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
2207{
2208	struct xdr_buf *buf = resp->xdr.buf;
2209	struct nfsd4_slot *slot = resp->cstate.slot;
2210	unsigned int base;
2211
2212	dprintk("--> %s slot %p\n", __func__, slot);
2213
2214	slot->sl_opcnt = resp->opcnt;
2215	slot->sl_status = resp->cstate.status;
2216
2217	slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
2218	if (nfsd4_not_cached(resp)) {
2219		slot->sl_datalen = 0;
2220		return;
2221	}
2222	base = resp->cstate.data_offset;
2223	slot->sl_datalen = buf->len - base;
2224	if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
2225		WARN("%s: sessions DRC could not cache compound\n", __func__);
2226	return;
2227}
2228
2229/*
2230 * Encode the replay sequence operation from the slot values.
2231 * If cachethis is FALSE encode the uncached rep error on the next
2232 * operation which sets resp->p and increments resp->opcnt for
2233 * nfs4svc_encode_compoundres.
2234 *
2235 */
2236static __be32
2237nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
2238			  struct nfsd4_compoundres *resp)
2239{
2240	struct nfsd4_op *op;
2241	struct nfsd4_slot *slot = resp->cstate.slot;
2242
2243	/* Encode the replayed sequence operation */
2244	op = &args->ops[resp->opcnt - 1];
2245	nfsd4_encode_operation(resp, op);
2246
2247	/* Return nfserr_retry_uncached_rep in next operation. */
2248	if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
2249		op = &args->ops[resp->opcnt++];
2250		op->status = nfserr_retry_uncached_rep;
2251		nfsd4_encode_operation(resp, op);
2252	}
2253	return op->status;
2254}
2255
2256/*
2257 * The sequence operation is not cached because we can use the slot and
2258 * session values.
2259 */
2260static __be32
2261nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
2262			 struct nfsd4_sequence *seq)
2263{
2264	struct nfsd4_slot *slot = resp->cstate.slot;
2265	struct xdr_stream *xdr = &resp->xdr;
2266	__be32 *p;
2267	__be32 status;
2268
2269	dprintk("--> %s slot %p\n", __func__, slot);
2270
2271	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
2272	if (status)
2273		return status;
2274
2275	p = xdr_reserve_space(xdr, slot->sl_datalen);
2276	if (!p) {
2277		WARN_ON_ONCE(1);
2278		return nfserr_serverfault;
2279	}
2280	xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
2281	xdr_commit_encode(xdr);
2282
2283	resp->opcnt = slot->sl_opcnt;
2284	return slot->sl_status;
2285}
2286
2287/*
2288 * Set the exchange_id flags returned by the server.
2289 */
2290static void
2291nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
2292{
2293#ifdef CONFIG_NFSD_PNFS
2294	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
2295#else
2296	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
2297#endif
2298
2299	/* Referrals are supported, Migration is not. */
2300	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
2301
2302	/* set the wire flags to return to client. */
2303	clid->flags = new->cl_exchange_flags;
2304}
2305
2306static bool client_has_state(struct nfs4_client *clp)
2307{
2308	/*
2309	 * Note clp->cl_openowners check isn't quite right: there's no
2310	 * need to count owners without stateid's.
2311	 *
2312	 * Also note we should probably be using this in 4.0 case too.
2313	 */
2314	return !list_empty(&clp->cl_openowners)
2315		|| !list_empty(&clp->cl_delegations)
2316		|| !list_empty(&clp->cl_sessions);
2317}
2318
2319__be32
2320nfsd4_exchange_id(struct svc_rqst *rqstp,
2321		  struct nfsd4_compound_state *cstate,
2322		  struct nfsd4_exchange_id *exid)
2323{
2324	struct nfs4_client *conf, *new;
2325	struct nfs4_client *unconf = NULL;
2326	__be32 status;
2327	char			addr_str[INET6_ADDRSTRLEN];
2328	nfs4_verifier		verf = exid->verifier;
2329	struct sockaddr		*sa = svc_addr(rqstp);
2330	bool	update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A;
2331	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2332
2333	rpc_ntop(sa, addr_str, sizeof(addr_str));
2334	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
2335		"ip_addr=%s flags %x, spa_how %d\n",
2336		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
2337		addr_str, exid->flags, exid->spa_how);
2338
2339	if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
2340		return nfserr_inval;
2341
2342	switch (exid->spa_how) {
2343	case SP4_MACH_CRED:
2344		if (!svc_rqst_integrity_protected(rqstp))
2345			return nfserr_inval;
2346	case SP4_NONE:
2347		break;
2348	default:				/* checked by xdr code */
2349		WARN_ON_ONCE(1);
2350	case SP4_SSV:
2351		return nfserr_encr_alg_unsupp;
2352	}
2353
2354	new = create_client(exid->clname, rqstp, &verf);
2355	if (new == NULL)
2356		return nfserr_jukebox;
2357
2358	/* Cases below refer to rfc 5661 section 18.35.4: */
2359	spin_lock(&nn->client_lock);
2360	conf = find_confirmed_client_by_name(&exid->clname, nn);
2361	if (conf) {
2362		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
2363		bool verfs_match = same_verf(&verf, &conf->cl_verifier);
2364
2365		if (update) {
2366			if (!clp_used_exchangeid(conf)) { /* buggy client */
2367				status = nfserr_inval;
2368				goto out;
2369			}
2370			if (!mach_creds_match(conf, rqstp)) {
2371				status = nfserr_wrong_cred;
2372				goto out;
2373			}
2374			if (!creds_match) { /* case 9 */
2375				status = nfserr_perm;
2376				goto out;
2377			}
2378			if (!verfs_match) { /* case 8 */
2379				status = nfserr_not_same;
2380				goto out;
2381			}
2382			/* case 6 */
2383			exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
2384			goto out_copy;
2385		}
2386		if (!creds_match) { /* case 3 */
2387			if (client_has_state(conf)) {
2388				status = nfserr_clid_inuse;
2389				goto out;
2390			}
2391			goto out_new;
2392		}
2393		if (verfs_match) { /* case 2 */
2394			conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
2395			goto out_copy;
2396		}
2397		/* case 5, client reboot */
2398		conf = NULL;
2399		goto out_new;
2400	}
2401
2402	if (update) { /* case 7 */
2403		status = nfserr_noent;
2404		goto out;
2405	}
2406
2407	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
2408	if (unconf) /* case 4, possible retry or client restart */
2409		unhash_client_locked(unconf);
2410
2411	/* case 1 (normal case) */
2412out_new:
2413	if (conf) {
2414		status = mark_client_expired_locked(conf);
2415		if (status)
2416			goto out;
2417	}
2418	new->cl_minorversion = cstate->minorversion;
2419	new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
2420
2421	gen_clid(new, nn);
2422	add_to_unconfirmed(new);
2423	swap(new, conf);
2424out_copy:
2425	exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
2426	exid->clientid.cl_id = conf->cl_clientid.cl_id;
2427
2428	exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
2429	nfsd4_set_ex_flags(conf, exid);
2430
2431	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
2432		conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
2433	status = nfs_ok;
2434
2435out:
2436	spin_unlock(&nn->client_lock);
2437	if (new)
2438		expire_client(new);
2439	if (unconf)
2440		expire_client(unconf);
2441	return status;
2442}
2443
2444static __be32
2445check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
2446{
2447	dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
2448		slot_seqid);
2449
2450	/* The slot is in use, and no response has been sent. */
2451	if (slot_inuse) {
2452		if (seqid == slot_seqid)
2453			return nfserr_jukebox;
2454		else
2455			return nfserr_seq_misordered;
2456	}
2457	/* Note unsigned 32-bit arithmetic handles wraparound: */
2458	if (likely(seqid == slot_seqid + 1))
2459		return nfs_ok;
2460	if (seqid == slot_seqid)
2461		return nfserr_replay_cache;
2462	return nfserr_seq_misordered;
2463}
2464
2465/*
2466 * Cache the create session result into the create session single DRC
2467 * slot cache by saving the xdr structure. sl_seqid has been set.
2468 * Do this for solo or embedded create session operations.
2469 */
2470static void
2471nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
2472			   struct nfsd4_clid_slot *slot, __be32 nfserr)
2473{
2474	slot->sl_status = nfserr;
2475	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
2476}
2477
2478static __be32
2479nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
2480			    struct nfsd4_clid_slot *slot)
2481{
2482	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
2483	return slot->sl_status;
2484}
2485
2486#define NFSD_MIN_REQ_HDR_SEQ_SZ	((\
2487			2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \
2488			1 +	/* MIN tag is length with zero, only length */ \
2489			3 +	/* version, opcount, opcode */ \
2490			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
2491				/* seqid, slotID, slotID, cache */ \
2492			4 ) * sizeof(__be32))
2493
2494#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\
2495			2 +	/* verifier: AUTH_NULL, length 0 */\
2496			1 +	/* status */ \
2497			1 +	/* MIN tag is length with zero, only length */ \
2498			3 +	/* opcount, opcode, opstatus*/ \
2499			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
2500				/* seqid, slotID, slotID, slotID, status */ \
2501			5 ) * sizeof(__be32))
2502
2503static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
2504{
2505	u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
2506
2507	if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ)
2508		return nfserr_toosmall;
2509	if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ)
2510		return nfserr_toosmall;
2511	ca->headerpadsz = 0;
2512	ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
2513	ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
2514	ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
2515	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
2516			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
2517	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
2518	/*
2519	 * Note decreasing slot size below client's request may make it
2520	 * difficult for client to function correctly, whereas
2521	 * decreasing the number of slots will (just?) affect
2522	 * performance.  When short on memory we therefore prefer to
2523	 * decrease number of slots instead of their size.  Clients that
2524	 * request larger slots than they need will get poor results:
2525	 */
2526	ca->maxreqs = nfsd4_get_drc_mem(ca);
2527	if (!ca->maxreqs)
2528		return nfserr_jukebox;
2529
2530	return nfs_ok;
2531}
2532
2533#define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
2534				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
2535#define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
2536				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
2537
2538static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
2539{
2540	ca->headerpadsz = 0;
2541
2542	/*
2543	 * These RPC_MAX_HEADER macros are overkill, especially since we
2544	 * don't even do gss on the backchannel yet.  But this is still
2545	 * less than 1k.  Tighten up this estimate in the unlikely event
2546	 * it turns out to be a problem for some client:
2547	 */
2548	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
2549		return nfserr_toosmall;
2550	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
2551		return nfserr_toosmall;
2552	ca->maxresp_cached = 0;
2553	if (ca->maxops < 2)
2554		return nfserr_toosmall;
2555
2556	return nfs_ok;
2557}
2558
2559static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
2560{
2561	switch (cbs->flavor) {
2562	case RPC_AUTH_NULL:
2563	case RPC_AUTH_UNIX:
2564		return nfs_ok;
2565	default:
2566		/*
2567		 * GSS case: the spec doesn't allow us to return this
2568		 * error.  But it also doesn't allow us not to support
2569		 * GSS.
2570		 * I'd rather this fail hard than return some error the
2571		 * client might think it can already handle:
2572		 */
2573		return nfserr_encr_alg_unsupp;
2574	}
2575}
2576
2577__be32
2578nfsd4_create_session(struct svc_rqst *rqstp,
2579		     struct nfsd4_compound_state *cstate,
2580		     struct nfsd4_create_session *cr_ses)
2581{
2582	struct sockaddr *sa = svc_addr(rqstp);
2583	struct nfs4_client *conf, *unconf;
2584	struct nfs4_client *old = NULL;
2585	struct nfsd4_session *new;
2586	struct nfsd4_conn *conn;
2587	struct nfsd4_clid_slot *cs_slot = NULL;
2588	__be32 status = 0;
2589	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2590
2591	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
2592		return nfserr_inval;
2593	status = nfsd4_check_cb_sec(&cr_ses->cb_sec);
2594	if (status)
2595		return status;
2596	status = check_forechannel_attrs(&cr_ses->fore_channel, nn);
2597	if (status)
2598		return status;
2599	status = check_backchannel_attrs(&cr_ses->back_channel);
2600	if (status)
2601		goto out_release_drc_mem;
2602	status = nfserr_jukebox;
2603	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
2604	if (!new)
2605		goto out_release_drc_mem;
2606	conn = alloc_conn_from_crses(rqstp, cr_ses);
2607	if (!conn)
2608		goto out_free_session;
2609
2610	spin_lock(&nn->client_lock);
2611	unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
2612	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
2613	WARN_ON_ONCE(conf && unconf);
2614
2615	if (conf) {
2616		status = nfserr_wrong_cred;
2617		if (!mach_creds_match(conf, rqstp))
2618			goto out_free_conn;
2619		cs_slot = &conf->cl_cs_slot;
2620		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
2621		if (status == nfserr_replay_cache) {
2622			status = nfsd4_replay_create_session(cr_ses, cs_slot);
2623			goto out_free_conn;
2624		} else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
2625			status = nfserr_seq_misordered;
2626			goto out_free_conn;
2627		}
2628	} else if (unconf) {
2629		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
2630		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
2631			status = nfserr_clid_inuse;
2632			goto out_free_conn;
2633		}
2634		status = nfserr_wrong_cred;
2635		if (!mach_creds_match(unconf, rqstp))
2636			goto out_free_conn;
2637		cs_slot = &unconf->cl_cs_slot;
2638		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
2639		if (status) {
2640			/* an unconfirmed replay returns misordered */
2641			status = nfserr_seq_misordered;
2642			goto out_free_conn;
2643		}
2644		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2645		if (old) {
2646			status = mark_client_expired_locked(old);
2647			if (status) {
2648				old = NULL;
2649				goto out_free_conn;
2650			}
2651		}
2652		move_to_confirmed(unconf);
2653		conf = unconf;
2654	} else {
2655		status = nfserr_stale_clientid;
2656		goto out_free_conn;
2657	}
2658	status = nfs_ok;
2659	/*
2660	 * We do not support RDMA or persistent sessions
2661	 */
2662	cr_ses->flags &= ~SESSION4_PERSIST;
2663	cr_ses->flags &= ~SESSION4_RDMA;
2664
2665	init_session(rqstp, new, conf, cr_ses);
2666	nfsd4_get_session_locked(new);
2667
2668	memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
2669	       NFS4_MAX_SESSIONID_LEN);
2670	cs_slot->sl_seqid++;
2671	cr_ses->seqid = cs_slot->sl_seqid;
2672
2673	/* cache solo and embedded create sessions under the client_lock */
2674	nfsd4_cache_create_session(cr_ses, cs_slot, status);
2675	spin_unlock(&nn->client_lock);
2676	/* init connection and backchannel */
2677	nfsd4_init_conn(rqstp, conn, new);
2678	nfsd4_put_session(new);
2679	if (old)
2680		expire_client(old);
2681	return status;
2682out_free_conn:
2683	spin_unlock(&nn->client_lock);
2684	free_conn(conn);
2685	if (old)
2686		expire_client(old);
2687out_free_session:
2688	__free_session(new);
2689out_release_drc_mem:
2690	nfsd4_put_drc_mem(&cr_ses->fore_channel);
2691	return status;
2692}
2693
2694static __be32 nfsd4_map_bcts_dir(u32 *dir)
2695{
2696	switch (*dir) {
2697	case NFS4_CDFC4_FORE:
2698	case NFS4_CDFC4_BACK:
2699		return nfs_ok;
2700	case NFS4_CDFC4_FORE_OR_BOTH:
2701	case NFS4_CDFC4_BACK_OR_BOTH:
2702		*dir = NFS4_CDFC4_BOTH;
2703		return nfs_ok;
2704	};
2705	return nfserr_inval;
2706}
2707
2708__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
2709{
2710	struct nfsd4_session *session = cstate->session;
2711	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2712	__be32 status;
2713
2714	status = nfsd4_check_cb_sec(&bc->bc_cb_sec);
2715	if (status)
2716		return status;
2717	spin_lock(&nn->client_lock);
2718	session->se_cb_prog = bc->bc_cb_program;
2719	session->se_cb_sec = bc->bc_cb_sec;
2720	spin_unlock(&nn->client_lock);
2721
2722	nfsd4_probe_callback(session->se_client);
2723
2724	return nfs_ok;
2725}
2726
2727__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
2728		     struct nfsd4_compound_state *cstate,
2729		     struct nfsd4_bind_conn_to_session *bcts)
2730{
2731	__be32 status;
2732	struct nfsd4_conn *conn;
2733	struct nfsd4_session *session;
2734	struct net *net = SVC_NET(rqstp);
2735	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2736
2737	if (!nfsd4_last_compound_op(rqstp))
2738		return nfserr_not_only_op;
2739	spin_lock(&nn->client_lock);
2740	session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
2741	spin_unlock(&nn->client_lock);
2742	if (!session)
2743		goto out_no_session;
2744	status = nfserr_wrong_cred;
2745	if (!mach_creds_match(session->se_client, rqstp))
2746		goto out;
2747	status = nfsd4_map_bcts_dir(&bcts->dir);
2748	if (status)
2749		goto out;
2750	conn = alloc_conn(rqstp, bcts->dir);
2751	status = nfserr_jukebox;
2752	if (!conn)
2753		goto out;
2754	nfsd4_init_conn(rqstp, conn, session);
2755	status = nfs_ok;
2756out:
2757	nfsd4_put_session(session);
2758out_no_session:
2759	return status;
2760}
2761
2762static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
2763{
2764	if (!session)
2765		return 0;
2766	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
2767}
2768
2769__be32
2770nfsd4_destroy_session(struct svc_rqst *r,
2771		      struct nfsd4_compound_state *cstate,
2772		      struct nfsd4_destroy_session *sessionid)
2773{
2774	struct nfsd4_session *ses;
2775	__be32 status;
2776	int ref_held_by_me = 0;
2777	struct net *net = SVC_NET(r);
2778	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2779
2780	status = nfserr_not_only_op;
2781	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
2782		if (!nfsd4_last_compound_op(r))
2783			goto out;
2784		ref_held_by_me++;
2785	}
2786	dump_sessionid(__func__, &sessionid->sessionid);
2787	spin_lock(&nn->client_lock);
2788	ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
2789	if (!ses)
2790		goto out_client_lock;
2791	status = nfserr_wrong_cred;
2792	if (!mach_creds_match(ses->se_client, r))
2793		goto out_put_session;
2794	status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
2795	if (status)
2796		goto out_put_session;
2797	unhash_session(ses);
2798	spin_unlock(&nn->client_lock);
2799
2800	nfsd4_probe_callback_sync(ses->se_client);
2801
2802	spin_lock(&nn->client_lock);
2803	status = nfs_ok;
2804out_put_session:
2805	nfsd4_put_session_locked(ses);
2806out_client_lock:
2807	spin_unlock(&nn->client_lock);
2808out:
2809	return status;
2810}
2811
2812static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
2813{
2814	struct nfsd4_conn *c;
2815
2816	list_for_each_entry(c, &s->se_conns, cn_persession) {
2817		if (c->cn_xprt == xpt) {
2818			return c;
2819		}
2820	}
2821	return NULL;
2822}
2823
2824static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
2825{
2826	struct nfs4_client *clp = ses->se_client;
2827	struct nfsd4_conn *c;
2828	__be32 status = nfs_ok;
2829	int ret;
2830
2831	spin_lock(&clp->cl_lock);
2832	c = __nfsd4_find_conn(new->cn_xprt, ses);
2833	if (c)
2834		goto out_free;
2835	status = nfserr_conn_not_bound_to_session;
2836	if (clp->cl_mach_cred)
2837		goto out_free;
2838	__nfsd4_hash_conn(new, ses);
2839	spin_unlock(&clp->cl_lock);
2840	ret = nfsd4_register_conn(new);
2841	if (ret)
2842		/* oops; xprt is already down: */
2843		nfsd4_conn_lost(&new->cn_xpt_user);
2844	return nfs_ok;
2845out_free:
2846	spin_unlock(&clp->cl_lock);
2847	free_conn(new);
2848	return status;
2849}
2850
2851static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
2852{
2853	struct nfsd4_compoundargs *args = rqstp->rq_argp;
2854
2855	return args->opcnt > session->se_fchannel.maxops;
2856}
2857
2858static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
2859				  struct nfsd4_session *session)
2860{
2861	struct xdr_buf *xb = &rqstp->rq_arg;
2862
2863	return xb->len > session->se_fchannel.maxreq_sz;
2864}
2865
2866__be32
2867nfsd4_sequence(struct svc_rqst *rqstp,
2868	       struct nfsd4_compound_state *cstate,
2869	       struct nfsd4_sequence *seq)
2870{
2871	struct nfsd4_compoundres *resp = rqstp->rq_resp;
2872	struct xdr_stream *xdr = &resp->xdr;
2873	struct nfsd4_session *session;
2874	struct nfs4_client *clp;
2875	struct nfsd4_slot *slot;
2876	struct nfsd4_conn *conn;
2877	__be32 status;
2878	int buflen;
2879	struct net *net = SVC_NET(rqstp);
2880	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2881
2882	if (resp->opcnt != 1)
2883		return nfserr_sequence_pos;
2884
2885	/*
2886	 * Will be either used or freed by nfsd4_sequence_check_conn
2887	 * below.
2888	 */
2889	conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
2890	if (!conn)
2891		return nfserr_jukebox;
2892
2893	spin_lock(&nn->client_lock);
2894	session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
2895	if (!session)
2896		goto out_no_session;
2897	clp = session->se_client;
2898
2899	status = nfserr_too_many_ops;
2900	if (nfsd4_session_too_many_ops(rqstp, session))
2901		goto out_put_session;
2902
2903	status = nfserr_req_too_big;
2904	if (nfsd4_request_too_big(rqstp, session))
2905		goto out_put_session;
2906
2907	status = nfserr_badslot;
2908	if (seq->slotid >= session->se_fchannel.maxreqs)
2909		goto out_put_session;
2910
2911	slot = session->se_slots[seq->slotid];
2912	dprintk("%s: slotid %d\n", __func__, seq->slotid);
2913
2914	/* We do not negotiate the number of slots yet, so set the
2915	 * maxslots to the session maxreqs which is used to encode
2916	 * sr_highest_slotid and the sr_target_slot id to maxslots */
2917	seq->maxslots = session->se_fchannel.maxreqs;
2918
2919	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
2920					slot->sl_flags & NFSD4_SLOT_INUSE);
2921	if (status == nfserr_replay_cache) {
2922		status = nfserr_seq_misordered;
2923		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
2924			goto out_put_session;
2925		cstate->slot = slot;
2926		cstate->session = session;
2927		cstate->clp = clp;
2928		/* Return the cached reply status and set cstate->status
2929		 * for nfsd4_proc_compound processing */
2930		status = nfsd4_replay_cache_entry(resp, seq);
2931		cstate->status = nfserr_replay_cache;
2932		goto out;
2933	}
2934	if (status)
2935		goto out_put_session;
2936
2937	status = nfsd4_sequence_check_conn(conn, session);
2938	conn = NULL;
2939	if (status)
2940		goto out_put_session;
2941
2942	buflen = (seq->cachethis) ?
2943			session->se_fchannel.maxresp_cached :
2944			session->se_fchannel.maxresp_sz;
2945	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
2946				    nfserr_rep_too_big;
2947	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
2948		goto out_put_session;
2949	svc_reserve(rqstp, buflen);
2950
2951	status = nfs_ok;
2952	/* Success! bump slot seqid */
2953	slot->sl_seqid = seq->seqid;
2954	slot->sl_flags |= NFSD4_SLOT_INUSE;
2955	if (seq->cachethis)
2956		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
2957	else
2958		slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
2959
2960	cstate->slot = slot;
2961	cstate->session = session;
2962	cstate->clp = clp;
2963
2964out:
2965	switch (clp->cl_cb_state) {
2966	case NFSD4_CB_DOWN:
2967		seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
2968		break;
2969	case NFSD4_CB_FAULT:
2970		seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
2971		break;
2972	default:
2973		seq->status_flags = 0;
2974	}
2975	if (!list_empty(&clp->cl_revoked))
2976		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
2977out_no_session:
2978	if (conn)
2979		free_conn(conn);
2980	spin_unlock(&nn->client_lock);
2981	return status;
2982out_put_session:
2983	nfsd4_put_session_locked(session);
2984	goto out_no_session;
2985}
2986
2987void
2988nfsd4_sequence_done(struct nfsd4_compoundres *resp)
2989{
2990	struct nfsd4_compound_state *cs = &resp->cstate;
2991
2992	if (nfsd4_has_session(cs)) {
2993		if (cs->status != nfserr_replay_cache) {
2994			nfsd4_store_cache_entry(resp);
2995			cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
2996		}
2997		/* Drop session reference that was taken in nfsd4_sequence() */
2998		nfsd4_put_session(cs->session);
2999	} else if (cs->clp)
3000		put_client_renew(cs->clp);
3001}
3002
3003__be32
3004nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
3005{
3006	struct nfs4_client *conf, *unconf;
3007	struct nfs4_client *clp = NULL;
3008	__be32 status = 0;
3009	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3010
3011	spin_lock(&nn->client_lock);
3012	unconf = find_unconfirmed_client(&dc->clientid, true, nn);
3013	conf = find_confirmed_client(&dc->clientid, true, nn);
3014	WARN_ON_ONCE(conf && unconf);
3015
3016	if (conf) {
3017		if (client_has_state(conf)) {
3018			status = nfserr_clientid_busy;
3019			goto out;
3020		}
3021		status = mark_client_expired_locked(conf);
3022		if (status)
3023			goto out;
3024		clp = conf;
3025	} else if (unconf)
3026		clp = unconf;
3027	else {
3028		status = nfserr_stale_clientid;
3029		goto out;
3030	}
3031	if (!mach_creds_match(clp, rqstp)) {
3032		clp = NULL;
3033		status = nfserr_wrong_cred;
3034		goto out;
3035	}
3036	unhash_client_locked(clp);
3037out:
3038	spin_unlock(&nn->client_lock);
3039	if (clp)
3040		expire_client(clp);
3041	return status;
3042}
3043
3044__be32
3045nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
3046{
3047	__be32 status = 0;
3048
3049	if (rc->rca_one_fs) {
3050		if (!cstate->current_fh.fh_dentry)
3051			return nfserr_nofilehandle;
3052		/*
3053		 * We don't take advantage of the rca_one_fs case.
3054		 * That's OK, it's optional, we can safely ignore it.
3055		 */
3056		 return nfs_ok;
3057	}
3058
3059	status = nfserr_complete_already;
3060	if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
3061			     &cstate->session->se_client->cl_flags))
3062		goto out;
3063
3064	status = nfserr_stale_clientid;
3065	if (is_client_expired(cstate->session->se_client))
3066		/*
3067		 * The following error isn't really legal.
3068		 * But we only get here if the client just explicitly
3069		 * destroyed the client.  Surely it no longer cares what
3070		 * error it gets back on an operation for the dead
3071		 * client.
3072		 */
3073		goto out;
3074
3075	status = nfs_ok;
3076	nfsd4_client_record_create(cstate->session->se_client);
3077out:
3078	return status;
3079}
3080
3081__be32
3082nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3083		  struct nfsd4_setclientid *setclid)
3084{
3085	struct xdr_netobj 	clname = setclid->se_name;
3086	nfs4_verifier		clverifier = setclid->se_verf;
3087	struct nfs4_client	*conf, *new;
3088	struct nfs4_client	*unconf = NULL;
3089	__be32 			status;
3090	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3091
3092	new = create_client(clname, rqstp, &clverifier);
3093	if (new == NULL)
3094		return nfserr_jukebox;
3095	/* Cases below refer to rfc 3530 section 14.2.33: */
3096	spin_lock(&nn->client_lock);
3097	conf = find_confirmed_client_by_name(&clname, nn);
3098	if (conf) {
3099		/* case 0: */
3100		status = nfserr_clid_inuse;
3101		if (clp_used_exchangeid(conf))
3102			goto out;
3103		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
3104			char addr_str[INET6_ADDRSTRLEN];
3105			rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
3106				 sizeof(addr_str));
3107			dprintk("NFSD: setclientid: string in use by client "
3108				"at %s\n", addr_str);
3109			goto out;
3110		}
3111	}
3112	unconf = find_unconfirmed_client_by_name(&clname, nn);
3113	if (unconf)
3114		unhash_client_locked(unconf);
3115	if (conf && same_verf(&conf->cl_verifier, &clverifier))
3116		/* case 1: probable callback update */
3117		copy_clid(new, conf);
3118	else /* case 4 (new client) or cases 2, 3 (client reboot): */
3119		gen_clid(new, nn);
3120	new->cl_minorversion = 0;
3121	gen_callback(new, setclid, rqstp);
3122	add_to_unconfirmed(new);
3123	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
3124	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
3125	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
3126	new = NULL;
3127	status = nfs_ok;
3128out:
3129	spin_unlock(&nn->client_lock);
3130	if (new)
3131		free_client(new);
3132	if (unconf)
3133		expire_client(unconf);
3134	return status;
3135}
3136
3137
3138__be32
3139nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
3140			 struct nfsd4_compound_state *cstate,
3141			 struct nfsd4_setclientid_confirm *setclientid_confirm)
3142{
3143	struct nfs4_client *conf, *unconf;
3144	struct nfs4_client *old = NULL;
3145	nfs4_verifier confirm = setclientid_confirm->sc_confirm;
3146	clientid_t * clid = &setclientid_confirm->sc_clientid;
3147	__be32 status;
3148	struct nfsd_net	*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3149
3150	if (STALE_CLIENTID(clid, nn))
3151		return nfserr_stale_clientid;
3152
3153	spin_lock(&nn->client_lock);
3154	conf = find_confirmed_client(clid, false, nn);
3155	unconf = find_unconfirmed_client(clid, false, nn);
3156	/*
3157	 * We try hard to give out unique clientid's, so if we get an
3158	 * attempt to confirm the same clientid with a different cred,
3159	 * there's a bug somewhere.  Let's charitably assume it's our
3160	 * bug.
3161	 */
3162	status = nfserr_serverfault;
3163	if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
3164		goto out;
3165	if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
3166		goto out;
3167	/* cases below refer to rfc 3530 section 14.2.34: */
3168	if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
3169		if (conf && !unconf) /* case 2: probable retransmit */
3170			status = nfs_ok;
3171		else /* case 4: client hasn't noticed we rebooted yet? */
3172			status = nfserr_stale_clientid;
3173		goto out;
3174	}
3175	status = nfs_ok;
3176	if (conf) { /* case 1: callback update */
3177		old = unconf;
3178		unhash_client_locked(old);
3179		nfsd4_change_callback(conf, &unconf->cl_cb_conn);
3180	} else { /* case 3: normal case; new or rebooted client */
3181		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
3182		if (old) {
3183			status = mark_client_expired_locked(old);
3184			if (status) {
3185				old = NULL;
3186				goto out;
3187			}
3188		}
3189		move_to_confirmed(unconf);
3190		conf = unconf;
3191	}
3192	get_client_locked(conf);
3193	spin_unlock(&nn->client_lock);
3194	nfsd4_probe_callback(conf);
3195	spin_lock(&nn->client_lock);
3196	put_client_renew_locked(conf);
3197out:
3198	spin_unlock(&nn->client_lock);
3199	if (old)
3200		expire_client(old);
3201	return status;
3202}
3203
3204static struct nfs4_file *nfsd4_alloc_file(void)
3205{
3206	return kmem_cache_alloc(file_slab, GFP_KERNEL);
3207}
3208
3209/* OPEN Share state helper functions */
3210static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3211				struct nfs4_file *fp)
3212{
3213	lockdep_assert_held(&state_lock);
3214
3215	atomic_set(&fp->fi_ref, 1);
3216	spin_lock_init(&fp->fi_lock);
3217	INIT_LIST_HEAD(&fp->fi_stateids);
3218	INIT_LIST_HEAD(&fp->fi_delegations);
3219	INIT_LIST_HEAD(&fp->fi_clnt_odstate);
3220	fh_copy_shallow(&fp->fi_fhandle, fh);
3221	fp->fi_deleg_file = NULL;
3222	fp->fi_had_conflict = false;
3223	fp->fi_share_deny = 0;
3224	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
3225	memset(fp->fi_access, 0, sizeof(fp->fi_access));
3226#ifdef CONFIG_NFSD_PNFS
3227	INIT_LIST_HEAD(&fp->fi_lo_states);
3228	atomic_set(&fp->fi_lo_recalls, 0);
3229#endif
3230	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
3231}
3232
3233void
3234nfsd4_free_slabs(void)
3235{
3236	kmem_cache_destroy(odstate_slab);
3237	kmem_cache_destroy(openowner_slab);
3238	kmem_cache_destroy(lockowner_slab);
3239	kmem_cache_destroy(file_slab);
3240	kmem_cache_destroy(stateid_slab);
3241	kmem_cache_destroy(deleg_slab);
3242}
3243
3244int
3245nfsd4_init_slabs(void)
3246{
3247	openowner_slab = kmem_cache_create("nfsd4_openowners",
3248			sizeof(struct nfs4_openowner), 0, 0, NULL);
3249	if (openowner_slab == NULL)
3250		goto out;
3251	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
3252			sizeof(struct nfs4_lockowner), 0, 0, NULL);
3253	if (lockowner_slab == NULL)
3254		goto out_free_openowner_slab;
3255	file_slab = kmem_cache_create("nfsd4_files",
3256			sizeof(struct nfs4_file), 0, 0, NULL);
3257	if (file_slab == NULL)
3258		goto out_free_lockowner_slab;
3259	stateid_slab = kmem_cache_create("nfsd4_stateids",
3260			sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
3261	if (stateid_slab == NULL)
3262		goto out_free_file_slab;
3263	deleg_slab = kmem_cache_create("nfsd4_delegations",
3264			sizeof(struct nfs4_delegation), 0, 0, NULL);
3265	if (deleg_slab == NULL)
3266		goto out_free_stateid_slab;
3267	odstate_slab = kmem_cache_create("nfsd4_odstate",
3268			sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
3269	if (odstate_slab == NULL)
3270		goto out_free_deleg_slab;
3271	return 0;
3272
3273out_free_deleg_slab:
3274	kmem_cache_destroy(deleg_slab);
3275out_free_stateid_slab:
3276	kmem_cache_destroy(stateid_slab);
3277out_free_file_slab:
3278	kmem_cache_destroy(file_slab);
3279out_free_lockowner_slab:
3280	kmem_cache_destroy(lockowner_slab);
3281out_free_openowner_slab:
3282	kmem_cache_destroy(openowner_slab);
3283out:
3284	dprintk("nfsd4: out of memory while initializing nfsv4\n");
3285	return -ENOMEM;
3286}
3287
3288static void init_nfs4_replay(struct nfs4_replay *rp)
3289{
3290	rp->rp_status = nfserr_serverfault;
3291	rp->rp_buflen = 0;
3292	rp->rp_buf = rp->rp_ibuf;
3293	mutex_init(&rp->rp_mutex);
3294}
3295
3296static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
3297		struct nfs4_stateowner *so)
3298{
3299	if (!nfsd4_has_session(cstate)) {
3300		mutex_lock(&so->so_replay.rp_mutex);
3301		cstate->replay_owner = nfs4_get_stateowner(so);
3302	}
3303}
3304
3305void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
3306{
3307	struct nfs4_stateowner *so = cstate->replay_owner;
3308
3309	if (so != NULL) {
3310		cstate->replay_owner = NULL;
3311		mutex_unlock(&so->so_replay.rp_mutex);
3312		nfs4_put_stateowner(so);
3313	}
3314}
3315
3316static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
3317{
3318	struct nfs4_stateowner *sop;
3319
3320	sop = kmem_cache_alloc(slab, GFP_KERNEL);
3321	if (!sop)
3322		return NULL;
3323
3324	sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
3325	if (!sop->so_owner.data) {
3326		kmem_cache_free(slab, sop);
3327		return NULL;
3328	}
3329	sop->so_owner.len = owner->len;
3330
3331	INIT_LIST_HEAD(&sop->so_stateids);
3332	sop->so_client = clp;
3333	init_nfs4_replay(&sop->so_replay);
3334	atomic_set(&sop->so_count, 1);
3335	return sop;
3336}
3337
3338static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
3339{
3340	lockdep_assert_held(&clp->cl_lock);
3341
3342	list_add(&oo->oo_owner.so_strhash,
3343		 &clp->cl_ownerstr_hashtbl[strhashval]);
3344	list_add(&oo->oo_perclient, &clp->cl_openowners);
3345}
3346
3347static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
3348{
3349	unhash_openowner_locked(openowner(so));
3350}
3351
3352static void nfs4_free_openowner(struct nfs4_stateowner *so)
3353{
3354	struct nfs4_openowner *oo = openowner(so);
3355
3356	kmem_cache_free(openowner_slab, oo);
3357}
3358
3359static const struct nfs4_stateowner_operations openowner_ops = {
3360	.so_unhash =	nfs4_unhash_openowner,
3361	.so_free =	nfs4_free_openowner,
3362};
3363
3364static struct nfs4_openowner *
3365alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3366			   struct nfsd4_compound_state *cstate)
3367{
3368	struct nfs4_client *clp = cstate->clp;
3369	struct nfs4_openowner *oo, *ret;
3370
3371	oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
3372	if (!oo)
3373		return NULL;
3374	oo->oo_owner.so_ops = &openowner_ops;
3375	oo->oo_owner.so_is_open_owner = 1;
3376	oo->oo_owner.so_seqid = open->op_seqid;
3377	oo->oo_flags = 0;
3378	if (nfsd4_has_session(cstate))
3379		oo->oo_flags |= NFS4_OO_CONFIRMED;
3380	oo->oo_time = 0;
3381	oo->oo_last_closed_stid = NULL;
3382	INIT_LIST_HEAD(&oo->oo_close_lru);
3383	spin_lock(&clp->cl_lock);
3384	ret = find_openstateowner_str_locked(strhashval, open, clp);
3385	if (ret == NULL) {
3386		hash_openowner(oo, clp, strhashval);
3387		ret = oo;
3388	} else
3389		nfs4_free_openowner(&oo->oo_owner);
3390	spin_unlock(&clp->cl_lock);
3391	return ret;
3392}
3393
3394static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
3395	struct nfs4_openowner *oo = open->op_openowner;
3396
3397	atomic_inc(&stp->st_stid.sc_count);
3398	stp->st_stid.sc_type = NFS4_OPEN_STID;
3399	INIT_LIST_HEAD(&stp->st_locks);
3400	stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
3401	get_nfs4_file(fp);
3402	stp->st_stid.sc_file = fp;
3403	stp->st_access_bmap = 0;
3404	stp->st_deny_bmap = 0;
3405	stp->st_openstp = NULL;
3406	init_rwsem(&stp->st_rwsem);
3407	spin_lock(&oo->oo_owner.so_client->cl_lock);
3408	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
3409	spin_lock(&fp->fi_lock);
3410	list_add(&stp->st_perfile, &fp->fi_stateids);
3411	spin_unlock(&fp->fi_lock);
3412	spin_unlock(&oo->oo_owner.so_client->cl_lock);
3413}
3414
3415/*
3416 * In the 4.0 case we need to keep the owners around a little while to handle
3417 * CLOSE replay. We still do need to release any file access that is held by
3418 * them before returning however.
3419 */
3420static void
3421move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
3422{
3423	struct nfs4_ol_stateid *last;
3424	struct nfs4_openowner *oo = openowner(s->st_stateowner);
3425	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
3426						nfsd_net_id);
3427
3428	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
3429
3430	/*
3431	 * We know that we hold one reference via nfsd4_close, and another
3432	 * "persistent" reference for the client. If the refcount is higher
3433	 * than 2, then there are still calls in progress that are using this
3434	 * stateid. We can't put the sc_file reference until they are finished.
3435	 * Wait for the refcount to drop to 2. Since it has been unhashed,
3436	 * there should be no danger of the refcount going back up again at
3437	 * this point.
3438	 */
3439	wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
3440
3441	release_all_access(s);
3442	if (s->st_stid.sc_file) {
3443		put_nfs4_file(s->st_stid.sc_file);
3444		s->st_stid.sc_file = NULL;
3445	}
3446
3447	spin_lock(&nn->client_lock);
3448	last = oo->oo_last_closed_stid;
3449	oo->oo_last_closed_stid = s;
3450	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
3451	oo->oo_time = get_seconds();
3452	spin_unlock(&nn->client_lock);
3453	if (last)
3454		nfs4_put_stid(&last->st_stid);
3455}
3456
3457/* search file_hashtbl[] for file */
3458static struct nfs4_file *
3459find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
3460{
3461	struct nfs4_file *fp;
3462
3463	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
3464		if (fh_match(&fp->fi_fhandle, fh)) {
3465			if (atomic_inc_not_zero(&fp->fi_ref))
3466				return fp;
3467		}
3468	}
3469	return NULL;
3470}
3471
3472struct nfs4_file *
3473find_file(struct knfsd_fh *fh)
3474{
3475	struct nfs4_file *fp;
3476	unsigned int hashval = file_hashval(fh);
3477
3478	rcu_read_lock();
3479	fp = find_file_locked(fh, hashval);
3480	rcu_read_unlock();
3481	return fp;
3482}
3483
3484static struct nfs4_file *
3485find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
3486{
3487	struct nfs4_file *fp;
3488	unsigned int hashval = file_hashval(fh);
3489
3490	rcu_read_lock();
3491	fp = find_file_locked(fh, hashval);
3492	rcu_read_unlock();
3493	if (fp)
3494		return fp;
3495
3496	spin_lock(&state_lock);
3497	fp = find_file_locked(fh, hashval);
3498	if (likely(fp == NULL)) {
3499		nfsd4_init_file(fh, hashval, new);
3500		fp = new;
3501	}
3502	spin_unlock(&state_lock);
3503
3504	return fp;
3505}
3506
3507/*
3508 * Called to check deny when READ with all zero stateid or
3509 * WRITE with all zero or all one stateid
3510 */
3511static __be32
3512nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
3513{
3514	struct nfs4_file *fp;
3515	__be32 ret = nfs_ok;
3516
3517	fp = find_file(&current_fh->fh_handle);
3518	if (!fp)
3519		return ret;
3520	/* Check for conflicting share reservations */
3521	spin_lock(&fp->fi_lock);
3522	if (fp->fi_share_deny & deny_type)
3523		ret = nfserr_locked;
3524	spin_unlock(&fp->fi_lock);
3525	put_nfs4_file(fp);
3526	return ret;
3527}
3528
3529static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
3530{
3531	struct nfs4_delegation *dp = cb_to_delegation(cb);
3532	struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
3533					  nfsd_net_id);
3534
3535	block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
3536
3537	/*
3538	 * We can't do this in nfsd_break_deleg_cb because it is
3539	 * already holding inode->i_lock.
3540	 *
3541	 * If the dl_time != 0, then we know that it has already been
3542	 * queued for a lease break. Don't queue it again.
3543	 */
3544	spin_lock(&state_lock);
3545	if (dp->dl_time == 0) {
3546		dp->dl_time = get_seconds();
3547		list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
3548	}
3549	spin_unlock(&state_lock);
3550}
3551
3552static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
3553		struct rpc_task *task)
3554{
3555	struct nfs4_delegation *dp = cb_to_delegation(cb);
3556
3557	switch (task->tk_status) {
3558	case 0:
3559		return 1;
3560	case -EBADHANDLE:
3561	case -NFS4ERR_BAD_STATEID:
3562		/*
3563		 * Race: client probably got cb_recall before open reply
3564		 * granting delegation.
3565		 */
3566		if (dp->dl_retries--) {
3567			rpc_delay(task, 2 * HZ);
3568			return 0;
3569		}
3570		/*FALLTHRU*/
3571	default:
3572		return -1;
3573	}
3574}
3575
3576static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
3577{
3578	struct nfs4_delegation *dp = cb_to_delegation(cb);
3579
3580	nfs4_put_stid(&dp->dl_stid);
3581}
3582
3583static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
3584	.prepare	= nfsd4_cb_recall_prepare,
3585	.done		= nfsd4_cb_recall_done,
3586	.release	= nfsd4_cb_recall_release,
3587};
3588
3589static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3590{
3591	/*
3592	 * We're assuming the state code never drops its reference
3593	 * without first removing the lease.  Since we're in this lease
3594	 * callback (and since the lease code is serialized by the kernel
3595	 * lock) we know the server hasn't removed the lease yet, we know
3596	 * it's safe to take a reference.
3597	 */
3598	atomic_inc(&dp->dl_stid.sc_count);
3599	nfsd4_run_cb(&dp->dl_recall);
3600}
3601
3602/* Called from break_lease() with i_lock held. */
3603static bool
3604nfsd_break_deleg_cb(struct file_lock *fl)
3605{
3606	bool ret = false;
3607	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
3608	struct nfs4_delegation *dp;
3609
3610	if (!fp) {
3611		WARN(1, "(%p)->fl_owner NULL\n", fl);
3612		return ret;
3613	}
3614	if (fp->fi_had_conflict) {
3615		WARN(1, "duplicate break on %p\n", fp);
3616		return ret;
3617	}
3618	/*
3619	 * We don't want the locks code to timeout the lease for us;
3620	 * we'll remove it ourself if a delegation isn't returned
3621	 * in time:
3622	 */
3623	fl->fl_break_time = 0;
3624
3625	spin_lock(&fp->fi_lock);
3626	fp->fi_had_conflict = true;
3627	/*
3628	 * If there are no delegations on the list, then return true
3629	 * so that the lease code will go ahead and delete it.
3630	 */
3631	if (list_empty(&fp->fi_delegations))
3632		ret = true;
3633	else
3634		list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
3635			nfsd_break_one_deleg(dp);
3636	spin_unlock(&fp->fi_lock);
3637	return ret;
3638}
3639
3640static int
3641nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
3642		     struct list_head *dispose)
3643{
3644	if (arg & F_UNLCK)
3645		return lease_modify(onlist, arg, dispose);
3646	else
3647		return -EAGAIN;
3648}
3649
3650static const struct lock_manager_operations nfsd_lease_mng_ops = {
3651	.lm_break = nfsd_break_deleg_cb,
3652	.lm_change = nfsd_change_deleg_cb,
3653};
3654
3655static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
3656{
3657	if (nfsd4_has_session(cstate))
3658		return nfs_ok;
3659	if (seqid == so->so_seqid - 1)
3660		return nfserr_replay_me;
3661	if (seqid == so->so_seqid)
3662		return nfs_ok;
3663	return nfserr_bad_seqid;
3664}
3665
3666static __be32 lookup_clientid(clientid_t *clid,
3667		struct nfsd4_compound_state *cstate,
3668		struct nfsd_net *nn)
3669{
3670	struct nfs4_client *found;
3671
3672	if (cstate->clp) {
3673		found = cstate->clp;
3674		if (!same_clid(&found->cl_clientid, clid))
3675			return nfserr_stale_clientid;
3676		return nfs_ok;
3677	}
3678
3679	if (STALE_CLIENTID(clid, nn))
3680		return nfserr_stale_clientid;
3681
3682	/*
3683	 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
3684	 * cached already then we know this is for is for v4.0 and "sessions"
3685	 * will be false.
3686	 */
3687	WARN_ON_ONCE(cstate->session);
3688	spin_lock(&nn->client_lock);
3689	found = find_confirmed_client(clid, false, nn);
3690	if (!found) {
3691		spin_unlock(&nn->client_lock);
3692		return nfserr_expired;
3693	}
3694	atomic_inc(&found->cl_refcount);
3695	spin_unlock(&nn->client_lock);
3696
3697	/* Cache the nfs4_client in cstate! */
3698	cstate->clp = found;
3699	return nfs_ok;
3700}
3701
3702__be32
3703nfsd4_process_open1(struct nfsd4_compound_state *cstate,
3704		    struct nfsd4_open *open, struct nfsd_net *nn)
3705{
3706	clientid_t *clientid = &open->op_clientid;
3707	struct nfs4_client *clp = NULL;
3708	unsigned int strhashval;
3709	struct nfs4_openowner *oo = NULL;
3710	__be32 status;
3711
3712	if (STALE_CLIENTID(&open->op_clientid, nn))
3713		return nfserr_stale_clientid;
3714	/*
3715	 * In case we need it later, after we've already created the
3716	 * file and don't want to risk a further failure:
3717	 */
3718	open->op_file = nfsd4_alloc_file();
3719	if (open->op_file == NULL)
3720		return nfserr_jukebox;
3721
3722	status = lookup_clientid(clientid, cstate, nn);
3723	if (status)
3724		return status;
3725	clp = cstate->clp;
3726
3727	strhashval = ownerstr_hashval(&open->op_owner);
3728	oo = find_openstateowner_str(strhashval, open, clp);
3729	open->op_openowner = oo;
3730	if (!oo) {
3731		goto new_owner;
3732	}
3733	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
3734		/* Replace unconfirmed owners without checking for replay. */
3735		release_openowner(oo);
3736		open->op_openowner = NULL;
3737		goto new_owner;
3738	}
3739	status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
3740	if (status)
3741		return status;
3742	goto alloc_stateid;
3743new_owner:
3744	oo = alloc_init_open_stateowner(strhashval, open, cstate);
3745	if (oo == NULL)
3746		return nfserr_jukebox;
3747	open->op_openowner = oo;
3748alloc_stateid:
3749	open->op_stp = nfs4_alloc_open_stateid(clp);
3750	if (!open->op_stp)
3751		return nfserr_jukebox;
3752
3753	if (nfsd4_has_session(cstate) &&
3754	    (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
3755		open->op_odstate = alloc_clnt_odstate(clp);
3756		if (!open->op_odstate)
3757			return nfserr_jukebox;
3758	}
3759
3760	return nfs_ok;
3761}
3762
3763static inline __be32
3764nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
3765{
3766	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
3767		return nfserr_openmode;
3768	else
3769		return nfs_ok;
3770}
3771
3772static int share_access_to_flags(u32 share_access)
3773{
3774	return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
3775}
3776
3777static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
3778{
3779	struct nfs4_stid *ret;
3780
3781	ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
3782	if (!ret)
3783		return NULL;
3784	return delegstateid(ret);
3785}
3786
3787static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
3788{
3789	return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
3790	       open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH;
3791}
3792
3793static __be32
3794nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
3795		struct nfs4_delegation **dp)
3796{
3797	int flags;
3798	__be32 status = nfserr_bad_stateid;
3799	struct nfs4_delegation *deleg;
3800
3801	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
3802	if (deleg == NULL)
3803		goto out;
3804	flags = share_access_to_flags(open->op_share_access);
3805	status = nfs4_check_delegmode(deleg, flags);
3806	if (status) {
3807		nfs4_put_stid(&deleg->dl_stid);
3808		goto out;
3809	}
3810	*dp = deleg;
3811out:
3812	if (!nfsd4_is_deleg_cur(open))
3813		return nfs_ok;
3814	if (status)
3815		return status;
3816	open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3817	return nfs_ok;
3818}
3819
3820static struct nfs4_ol_stateid *
3821nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
3822{
3823	struct nfs4_ol_stateid *local, *ret = NULL;
3824	struct nfs4_openowner *oo = open->op_openowner;
3825
3826	spin_lock(&fp->fi_lock);
3827	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
3828		/* ignore lock owners */
3829		if (local->st_stateowner->so_is_open_owner == 0)
3830			continue;
3831		if (local->st_stateowner == &oo->oo_owner) {
3832			ret = local;
3833			atomic_inc(&ret->st_stid.sc_count);
3834			break;
3835		}
3836	}
3837	spin_unlock(&fp->fi_lock);
3838	return ret;
3839}
3840
3841static inline int nfs4_access_to_access(u32 nfs4_access)
3842{
3843	int flags = 0;
3844
3845	if (nfs4_access & NFS4_SHARE_ACCESS_READ)
3846		flags |= NFSD_MAY_READ;
3847	if (nfs4_access & NFS4_SHARE_ACCESS_WRITE)
3848		flags |= NFSD_MAY_WRITE;
3849	return flags;
3850}
3851
3852static inline __be32
3853nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
3854		struct nfsd4_open *open)
3855{
3856	struct iattr iattr = {
3857		.ia_valid = ATTR_SIZE,
3858		.ia_size = 0,
3859	};
3860	if (!open->op_truncate)
3861		return 0;
3862	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
3863		return nfserr_inval;
3864	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
3865}
3866
3867static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3868		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
3869		struct nfsd4_open *open)
3870{
3871	struct file *filp = NULL;
3872	__be32 status;
3873	int oflag = nfs4_access_to_omode(open->op_share_access);
3874	int access = nfs4_access_to_access(open->op_share_access);
3875	unsigned char old_access_bmap, old_deny_bmap;
3876
3877	spin_lock(&fp->fi_lock);
3878
3879	/*
3880	 * Are we trying to set a deny mode that would conflict with
3881	 * current access?
3882	 */
3883	status = nfs4_file_check_deny(fp, open->op_share_deny);
3884	if (status != nfs_ok) {
3885		spin_unlock(&fp->fi_lock);
3886		goto out;
3887	}
3888
3889	/* set access to the file */
3890	status = nfs4_file_get_access(fp, open->op_share_access);
3891	if (status != nfs_ok) {
3892		spin_unlock(&fp->fi_lock);
3893		goto out;
3894	}
3895
3896	/* Set access bits in stateid */
3897	old_access_bmap = stp->st_access_bmap;
3898	set_access(open->op_share_access, stp);
3899
3900	/* Set new deny mask */
3901	old_deny_bmap = stp->st_deny_bmap;
3902	set_deny(open->op_share_deny, stp);
3903	fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3904
3905	if (!fp->fi_fds[oflag]) {
3906		spin_unlock(&fp->fi_lock);
3907		status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
3908		if (status)
3909			goto out_put_access;
3910		spin_lock(&fp->fi_lock);
3911		if (!fp->fi_fds[oflag]) {
3912			fp->fi_fds[oflag] = filp;
3913			filp = NULL;
3914		}
3915	}
3916	spin_unlock(&fp->fi_lock);
3917	if (filp)
3918		fput(filp);
3919
3920	status = nfsd4_truncate(rqstp, cur_fh, open);
3921	if (status)
3922		goto out_put_access;
3923out:
3924	return status;
3925out_put_access:
3926	stp->st_access_bmap = old_access_bmap;
3927	nfs4_file_put_access(fp, open->op_share_access);
3928	reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
3929	goto out;
3930}
3931
3932static __be32
3933nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
3934{
3935	__be32 status;
3936	unsigned char old_deny_bmap;
3937
3938	if (!test_access(open->op_share_access, stp))
3939		return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
3940
3941	/* test and set deny mode */
3942	spin_lock(&fp->fi_lock);
3943	status = nfs4_file_check_deny(fp, open->op_share_deny);
3944	if (status == nfs_ok) {
3945		old_deny_bmap = stp->st_deny_bmap;
3946		set_deny(open->op_share_deny, stp);
3947		fp->fi_share_deny |=
3948				(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3949	}
3950	spin_unlock(&fp->fi_lock);
3951
3952	if (status != nfs_ok)
3953		return status;
3954
3955	status = nfsd4_truncate(rqstp, cur_fh, open);
3956	if (status != nfs_ok)
3957		reset_union_bmap_deny(old_deny_bmap, stp);
3958	return status;
3959}
3960
3961static void
3962nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
3963{
3964	open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3965}
3966
3967/* Should we give out recallable state?: */
3968static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
3969{
3970	if (clp->cl_cb_state == NFSD4_CB_UP)
3971		return true;
3972	/*
3973	 * In the sessions case, since we don't have to establish a
3974	 * separate connection for callbacks, we assume it's OK
3975	 * until we hear otherwise:
3976	 */
3977	return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
3978}
3979
3980static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3981{
3982	struct file_lock *fl;
3983
3984	fl = locks_alloc_lock();
3985	if (!fl)
3986		return NULL;
3987	fl->fl_lmops = &nfsd_lease_mng_ops;
3988	fl->fl_flags = FL_DELEG;
3989	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
3990	fl->fl_end = OFFSET_MAX;
3991	fl->fl_owner = (fl_owner_t)fp;
3992	fl->fl_pid = current->tgid;
3993	return fl;
3994}
3995
3996/**
3997 * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
3998 * @dp:   a pointer to the nfs4_delegation we're adding.
3999 *
4000 * Return:
4001 *      On success: Return code will be 0 on success.
4002 *
4003 *      On error: -EAGAIN if there was an existing delegation.
4004 *                 nonzero if there is an error in other cases.
4005 *
4006 */
4007
4008static int nfs4_setlease(struct nfs4_delegation *dp)
4009{
4010	struct nfs4_file *fp = dp->dl_stid.sc_file;
4011	struct file_lock *fl, *ret;
4012	struct file *filp;
4013	int status = 0;
4014
4015	fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
4016	if (!fl)
4017		return -ENOMEM;
4018	filp = find_readable_file(fp);
4019	if (!filp) {
4020		/* We should always have a readable file here */
4021		WARN_ON_ONCE(1);
4022		return -EBADF;
4023	}
4024	fl->fl_file = filp;
4025	ret = fl;
4026	status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
4027	if (fl)
4028		locks_free_lock(fl);
4029	if (status)
4030		goto out_fput;
4031	spin_lock(&state_lock);
4032	spin_lock(&fp->fi_lock);
4033	/* Did the lease get broken before we took the lock? */
4034	status = -EAGAIN;
4035	if (fp->fi_had_conflict)
4036		goto out_unlock;
4037	/* Race breaker */
4038	if (fp->fi_deleg_file) {
4039		status = hash_delegation_locked(dp, fp);
4040		goto out_unlock;
4041	}
4042	fp->fi_deleg_file = filp;
4043	fp->fi_delegees = 0;
4044	status = hash_delegation_locked(dp, fp);
4045	spin_unlock(&fp->fi_lock);
4046	spin_unlock(&state_lock);
4047	if (status) {
4048		/* Should never happen, this is a new fi_deleg_file  */
4049		WARN_ON_ONCE(1);
4050		goto out_fput;
4051	}
4052	return 0;
4053out_unlock:
4054	spin_unlock(&fp->fi_lock);
4055	spin_unlock(&state_lock);
4056out_fput:
4057	fput(filp);
4058	return status;
4059}
4060
4061static struct nfs4_delegation *
4062nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4063		    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
4064{
4065	int status;
4066	struct nfs4_delegation *dp;
4067
4068	if (fp->fi_had_conflict)
4069		return ERR_PTR(-EAGAIN);
4070
4071	spin_lock(&state_lock);
4072	spin_lock(&fp->fi_lock);
4073	status = nfs4_get_existing_delegation(clp, fp);
4074	spin_unlock(&fp->fi_lock);
4075	spin_unlock(&state_lock);
4076
4077	if (status)
4078		return ERR_PTR(status);
4079
4080	dp = alloc_init_deleg(clp, fh, odstate);
4081	if (!dp)
4082		return ERR_PTR(-ENOMEM);
4083
4084	get_nfs4_file(fp);
4085	spin_lock(&state_lock);
4086	spin_lock(&fp->fi_lock);
4087	dp->dl_stid.sc_file = fp;
4088	if (!fp->fi_deleg_file) {
4089		spin_unlock(&fp->fi_lock);
4090		spin_unlock(&state_lock);
4091		status = nfs4_setlease(dp);
4092		goto out;
4093	}
4094	if (fp->fi_had_conflict) {
4095		status = -EAGAIN;
4096		goto out_unlock;
4097	}
4098	status = hash_delegation_locked(dp, fp);
4099out_unlock:
4100	spin_unlock(&fp->fi_lock);
4101	spin_unlock(&state_lock);
4102out:
4103	if (status) {
4104		put_clnt_odstate(dp->dl_clnt_odstate);
4105		nfs4_put_stid(&dp->dl_stid);
4106		return ERR_PTR(status);
4107	}
4108	return dp;
4109}
4110
4111static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
4112{
4113	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
4114	if (status == -EAGAIN)
4115		open->op_why_no_deleg = WND4_CONTENTION;
4116	else {
4117		open->op_why_no_deleg = WND4_RESOURCE;
4118		switch (open->op_deleg_want) {
4119		case NFS4_SHARE_WANT_READ_DELEG:
4120		case NFS4_SHARE_WANT_WRITE_DELEG:
4121		case NFS4_SHARE_WANT_ANY_DELEG:
4122			break;
4123		case NFS4_SHARE_WANT_CANCEL:
4124			open->op_why_no_deleg = WND4_CANCELLED;
4125			break;
4126		case NFS4_SHARE_WANT_NO_DELEG:
4127			WARN_ON_ONCE(1);
4128		}
4129	}
4130}
4131
4132/*
4133 * Attempt to hand out a delegation.
4134 *
4135 * Note we don't support write delegations, and won't until the vfs has
4136 * proper support for them.
4137 */
4138static void
4139nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
4140			struct nfs4_ol_stateid *stp)
4141{
4142	struct nfs4_delegation *dp;
4143	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
4144	struct nfs4_client *clp = stp->st_stid.sc_client;
4145	int cb_up;
4146	int status = 0;
4147
4148	cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
4149	open->op_recall = 0;
4150	switch (open->op_claim_type) {
4151		case NFS4_OPEN_CLAIM_PREVIOUS:
4152			if (!cb_up)
4153				open->op_recall = 1;
4154			if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
4155				goto out_no_deleg;
4156			break;
4157		case NFS4_OPEN_CLAIM_NULL:
4158		case NFS4_OPEN_CLAIM_FH:
4159			/*
4160			 * Let's not give out any delegations till everyone's
4161			 * had the chance to reclaim theirs....
4162			 */
4163			if (locks_in_grace(clp->net))
4164				goto out_no_deleg;
4165			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
4166				goto out_no_deleg;
4167			/*
4168			 * Also, if the file was opened for write or
4169			 * create, there's a good chance the client's
4170			 * about to write to it, resulting in an
4171			 * immediate recall (since we don't support
4172			 * write delegations):
4173			 */
4174			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
4175				goto out_no_deleg;
4176			if (open->op_create == NFS4_OPEN_CREATE)
4177				goto out_no_deleg;
4178			break;
4179		default:
4180			goto out_no_deleg;
4181	}
4182	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
4183	if (IS_ERR(dp))
4184		goto out_no_deleg;
4185
4186	memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
4187
4188	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
4189		STATEID_VAL(&dp->dl_stid.sc_stateid));
4190	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
4191	nfs4_put_stid(&dp->dl_stid);
4192	return;
4193out_no_deleg:
4194	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
4195	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
4196	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
4197		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
4198		open->op_recall = 1;
4199	}
4200
4201	/* 4.1 client asking for a delegation? */
4202	if (open->op_deleg_want)
4203		nfsd4_open_deleg_none_ext(open, status);
4204	return;
4205}
4206
4207static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
4208					struct nfs4_delegation *dp)
4209{
4210	if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
4211	    dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
4212		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
4213		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
4214	} else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
4215		   dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
4216		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
4217		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
4218	}
4219	/* Otherwise the client must be confused wanting a delegation
4220	 * it already has, therefore we don't return
4221	 * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
4222	 */
4223}
4224
4225__be32
4226nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
4227{
4228	struct nfsd4_compoundres *resp = rqstp->rq_resp;
4229	struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
4230	struct nfs4_file *fp = NULL;
4231	struct nfs4_ol_stateid *stp = NULL;
4232	struct nfs4_delegation *dp = NULL;
4233	__be32 status;
4234
4235	/*
4236	 * Lookup file; if found, lookup stateid and check open request,
4237	 * and check for delegations in the process of being recalled.
4238	 * If not found, create the nfs4_file struct
4239	 */
4240	fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
4241	if (fp != open->op_file) {
4242		status = nfs4_check_deleg(cl, open, &dp);
4243		if (status)
4244			goto out;
4245		stp = nfsd4_find_existing_open(fp, open);
4246	} else {
4247		open->op_file = NULL;
4248		status = nfserr_bad_stateid;
4249		if (nfsd4_is_deleg_cur(open))
4250			goto out;
4251	}
4252
4253	/*
4254	 * OPEN the file, or upgrade an existing OPEN.
4255	 * If truncate fails, the OPEN fails.
4256	 */
4257	if (stp) {
4258		/* Stateid was found, this is an OPEN upgrade */
4259		down_read(&stp->st_rwsem);
4260		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
4261		if (status) {
4262			up_read(&stp->st_rwsem);
4263			goto out;
4264		}
4265	} else {
4266		stp = open->op_stp;
4267		open->op_stp = NULL;
4268		init_open_stateid(stp, fp, open);
4269		down_read(&stp->st_rwsem);
4270		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
4271		if (status) {
4272			up_read(&stp->st_rwsem);
4273			release_open_stateid(stp);
4274			goto out;
4275		}
4276
4277		stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
4278							open->op_odstate);
4279		if (stp->st_clnt_odstate == open->op_odstate)
4280			open->op_odstate = NULL;
4281	}
4282	update_stateid(&stp->st_stid.sc_stateid);
4283	memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4284	up_read(&stp->st_rwsem);
4285
4286	if (nfsd4_has_session(&resp->cstate)) {
4287		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
4288			open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
4289			open->op_why_no_deleg = WND4_NOT_WANTED;
4290			goto nodeleg;
4291		}
4292	}
4293
4294	/*
4295	* Attempt to hand out a delegation. No error return, because the
4296	* OPEN succeeds even if we fail.
4297	*/
4298	nfs4_open_delegation(current_fh, open, stp);
4299nodeleg:
4300	status = nfs_ok;
4301
4302	dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
4303		STATEID_VAL(&stp->st_stid.sc_stateid));
4304out:
4305	/* 4.1 client trying to upgrade/downgrade delegation? */
4306	if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
4307	    open->op_deleg_want)
4308		nfsd4_deleg_xgrade_none_ext(open, dp);
4309
4310	if (fp)
4311		put_nfs4_file(fp);
4312	if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
4313		nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
4314	/*
4315	* To finish the open response, we just need to set the rflags.
4316	*/
4317	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
4318	if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
4319	    !nfsd4_has_session(&resp->cstate))
4320		open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
4321	if (dp)
4322		nfs4_put_stid(&dp->dl_stid);
4323	if (stp)
4324		nfs4_put_stid(&stp->st_stid);
4325
4326	return status;
4327}
4328
4329void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4330			      struct nfsd4_open *open)
4331{
4332	if (open->op_openowner) {
4333		struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
4334
4335		nfsd4_cstate_assign_replay(cstate, so);
4336		nfs4_put_stateowner(so);
4337	}
4338	if (open->op_file)
4339		kmem_cache_free(file_slab, open->op_file);
4340	if (open->op_stp)
4341		nfs4_put_stid(&open->op_stp->st_stid);
4342	if (open->op_odstate)
4343		kmem_cache_free(odstate_slab, open->op_odstate);
4344}
4345
4346__be32
4347nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4348	    clientid_t *clid)
4349{
4350	struct nfs4_client *clp;
4351	__be32 status;
4352	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4353
4354	dprintk("process_renew(%08x/%08x): starting\n",
4355			clid->cl_boot, clid->cl_id);
4356	status = lookup_clientid(clid, cstate, nn);
4357	if (status)
4358		goto out;
4359	clp = cstate->clp;
4360	status = nfserr_cb_path_down;
4361	if (!list_empty(&clp->cl_delegations)
4362			&& clp->cl_cb_state != NFSD4_CB_UP)
4363		goto out;
4364	status = nfs_ok;
4365out:
4366	return status;
4367}
4368
4369void
4370nfsd4_end_grace(struct nfsd_net *nn)
4371{
4372	/* do nothing if grace period already ended */
4373	if (nn->grace_ended)
4374		return;
4375
4376	dprintk("NFSD: end of grace period\n");
4377	nn->grace_ended = true;
4378	/*
4379	 * If the server goes down again right now, an NFSv4
4380	 * client will still be allowed to reclaim after it comes back up,
4381	 * even if it hasn't yet had a chance to reclaim state this time.
4382	 *
4383	 */
4384	nfsd4_record_grace_done(nn);
4385	/*
4386	 * At this point, NFSv4 clients can still reclaim.  But if the
4387	 * server crashes, any that have not yet reclaimed will be out
4388	 * of luck on the next boot.
4389	 *
4390	 * (NFSv4.1+ clients are considered to have reclaimed once they
4391	 * call RECLAIM_COMPLETE.  NFSv4.0 clients are considered to
4392	 * have reclaimed after their first OPEN.)
4393	 */
4394	locks_end_grace(&nn->nfsd4_manager);
4395	/*
4396	 * At this point, and once lockd and/or any other containers
4397	 * exit their grace period, further reclaims will fail and
4398	 * regular locking can resume.
4399	 */
4400}
4401
4402static time_t
4403nfs4_laundromat(struct nfsd_net *nn)
4404{
4405	struct nfs4_client *clp;
4406	struct nfs4_openowner *oo;
4407	struct nfs4_delegation *dp;
4408	struct nfs4_ol_stateid *stp;
4409	struct list_head *pos, *next, reaplist;
4410	time_t cutoff = get_seconds() - nn->nfsd4_lease;
4411	time_t t, new_timeo = nn->nfsd4_lease;
4412
4413	dprintk("NFSD: laundromat service - starting\n");
4414	nfsd4_end_grace(nn);
4415	INIT_LIST_HEAD(&reaplist);
4416	spin_lock(&nn->client_lock);
4417	list_for_each_safe(pos, next, &nn->client_lru) {
4418		clp = list_entry(pos, struct nfs4_client, cl_lru);
4419		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
4420			t = clp->cl_time - cutoff;
4421			new_timeo = min(new_timeo, t);
4422			break;
4423		}
4424		if (mark_client_expired_locked(clp)) {
4425			dprintk("NFSD: client in use (clientid %08x)\n",
4426				clp->cl_clientid.cl_id);
4427			continue;
4428		}
4429		list_add(&clp->cl_lru, &reaplist);
4430	}
4431	spin_unlock(&nn->client_lock);
4432	list_for_each_safe(pos, next, &reaplist) {
4433		clp = list_entry(pos, struct nfs4_client, cl_lru);
4434		dprintk("NFSD: purging unused client (clientid %08x)\n",
4435			clp->cl_clientid.cl_id);
4436		list_del_init(&clp->cl_lru);
4437		expire_client(clp);
4438	}
4439	spin_lock(&state_lock);
4440	list_for_each_safe(pos, next, &nn->del_recall_lru) {
4441		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4442		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
4443			continue;
4444		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
4445			t = dp->dl_time - cutoff;
4446			new_timeo = min(new_timeo, t);
4447			break;
4448		}
4449		WARN_ON(!unhash_delegation_locked(dp));
4450		list_add(&dp->dl_recall_lru, &reaplist);
4451	}
4452	spin_unlock(&state_lock);
4453	while (!list_empty(&reaplist)) {
4454		dp = list_first_entry(&reaplist, struct nfs4_delegation,
4455					dl_recall_lru);
4456		list_del_init(&dp->dl_recall_lru);
4457		revoke_delegation(dp);
4458	}
4459
4460	spin_lock(&nn->client_lock);
4461	while (!list_empty(&nn->close_lru)) {
4462		oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
4463					oo_close_lru);
4464		if (time_after((unsigned long)oo->oo_time,
4465			       (unsigned long)cutoff)) {
4466			t = oo->oo_time - cutoff;
4467			new_timeo = min(new_timeo, t);
4468			break;
4469		}
4470		list_del_init(&oo->oo_close_lru);
4471		stp = oo->oo_last_closed_stid;
4472		oo->oo_last_closed_stid = NULL;
4473		spin_unlock(&nn->client_lock);
4474		nfs4_put_stid(&stp->st_stid);
4475		spin_lock(&nn->client_lock);
4476	}
4477	spin_unlock(&nn->client_lock);
4478
4479	new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
4480	return new_timeo;
4481}
4482
4483static struct workqueue_struct *laundry_wq;
4484static void laundromat_main(struct work_struct *);
4485
4486static void
4487laundromat_main(struct work_struct *laundry)
4488{
4489	time_t t;
4490	struct delayed_work *dwork = container_of(laundry, struct delayed_work,
4491						  work);
4492	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
4493					   laundromat_work);
4494
4495	t = nfs4_laundromat(nn);
4496	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
4497	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
4498}
4499
4500static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
4501{
4502	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
4503		return nfserr_bad_stateid;
4504	return nfs_ok;
4505}
4506
4507static inline int
4508access_permit_read(struct nfs4_ol_stateid *stp)
4509{
4510	return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
4511		test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
4512		test_access(NFS4_SHARE_ACCESS_WRITE, stp);
4513}
4514
4515static inline int
4516access_permit_write(struct nfs4_ol_stateid *stp)
4517{
4518	return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
4519		test_access(NFS4_SHARE_ACCESS_BOTH, stp);
4520}
4521
4522static
4523__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
4524{
4525        __be32 status = nfserr_openmode;
4526
4527	/* For lock stateid's, we test the parent open, not the lock: */
4528	if (stp->st_openstp)
4529		stp = stp->st_openstp;
4530	if ((flags & WR_STATE) && !access_permit_write(stp))
4531                goto out;
4532	if ((flags & RD_STATE) && !access_permit_read(stp))
4533                goto out;
4534	status = nfs_ok;
4535out:
4536	return status;
4537}
4538
4539static inline __be32
4540check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags)
4541{
4542	if (ONE_STATEID(stateid) && (flags & RD_STATE))
4543		return nfs_ok;
4544	else if (locks_in_grace(net)) {
4545		/* Answer in remaining cases depends on existence of
4546		 * conflicting state; so we must wait out the grace period. */
4547		return nfserr_grace;
4548	} else if (flags & WR_STATE)
4549		return nfs4_share_conflict(current_fh,
4550				NFS4_SHARE_DENY_WRITE);
4551	else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
4552		return nfs4_share_conflict(current_fh,
4553				NFS4_SHARE_DENY_READ);
4554}
4555
4556/*
4557 * Allow READ/WRITE during grace period on recovered state only for files
4558 * that are not able to provide mandatory locking.
4559 */
4560static inline int
4561grace_disallows_io(struct net *net, struct inode *inode)
4562{
4563	return locks_in_grace(net) && mandatory_lock(inode);
4564}
4565
4566/* Returns true iff a is later than b: */
4567static bool stateid_generation_after(stateid_t *a, stateid_t *b)
4568{
4569	return (s32)(a->si_generation - b->si_generation) > 0;
4570}
4571
4572static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
4573{
4574	/*
4575	 * When sessions are used the stateid generation number is ignored
4576	 * when it is zero.
4577	 */
4578	if (has_session && in->si_generation == 0)
4579		return nfs_ok;
4580
4581	if (in->si_generation == ref->si_generation)
4582		return nfs_ok;
4583
4584	/* If the client sends us a stateid from the future, it's buggy: */
4585	if (stateid_generation_after(in, ref))
4586		return nfserr_bad_stateid;
4587	/*
4588	 * However, we could see a stateid from the past, even from a
4589	 * non-buggy client.  For example, if the client sends a lock
4590	 * while some IO is outstanding, the lock may bump si_generation
4591	 * while the IO is still in flight.  The client could avoid that
4592	 * situation by waiting for responses on all the IO requests,
4593	 * but better performance may result in retrying IO that
4594	 * receives an old_stateid error if requests are rarely
4595	 * reordered in flight:
4596	 */
4597	return nfserr_old_stateid;
4598}
4599
4600static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
4601{
4602	if (ols->st_stateowner->so_is_open_owner &&
4603	    !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
4604		return nfserr_bad_stateid;
4605	return nfs_ok;
4606}
4607
4608static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4609{
4610	struct nfs4_stid *s;
4611	__be32 status = nfserr_bad_stateid;
4612
4613	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4614		return status;
4615	/* Client debugging aid. */
4616	if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
4617		char addr_str[INET6_ADDRSTRLEN];
4618		rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
4619				 sizeof(addr_str));
4620		pr_warn_ratelimited("NFSD: client %s testing state ID "
4621					"with incorrect client ID\n", addr_str);
4622		return status;
4623	}
4624	spin_lock(&cl->cl_lock);
4625	s = find_stateid_locked(cl, stateid);
4626	if (!s)
4627		goto out_unlock;
4628	status = check_stateid_generation(stateid, &s->sc_stateid, 1);
4629	if (status)
4630		goto out_unlock;
4631	switch (s->sc_type) {
4632	case NFS4_DELEG_STID:
4633		status = nfs_ok;
4634		break;
4635	case NFS4_REVOKED_DELEG_STID:
4636		status = nfserr_deleg_revoked;
4637		break;
4638	case NFS4_OPEN_STID:
4639	case NFS4_LOCK_STID:
4640		status = nfsd4_check_openowner_confirmed(openlockstateid(s));
4641		break;
4642	default:
4643		printk("unknown stateid type %x\n", s->sc_type);
4644		/* Fallthrough */
4645	case NFS4_CLOSED_STID:
4646	case NFS4_CLOSED_DELEG_STID:
4647		status = nfserr_bad_stateid;
4648	}
4649out_unlock:
4650	spin_unlock(&cl->cl_lock);
4651	return status;
4652}
4653
4654__be32
4655nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
4656		     stateid_t *stateid, unsigned char typemask,
4657		     struct nfs4_stid **s, struct nfsd_net *nn)
4658{
4659	__be32 status;
4660
4661	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4662		return nfserr_bad_stateid;
4663	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
4664	if (status == nfserr_stale_clientid) {
4665		if (cstate->session)
4666			return nfserr_bad_stateid;
4667		return nfserr_stale_stateid;
4668	}
4669	if (status)
4670		return status;
4671	*s = find_stateid_by_type(cstate->clp, stateid, typemask);
4672	if (!*s)
4673		return nfserr_bad_stateid;
4674	return nfs_ok;
4675}
4676
4677static struct file *
4678nfs4_find_file(struct nfs4_stid *s, int flags)
4679{
4680	switch (s->sc_type) {
4681	case NFS4_DELEG_STID:
4682		if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
4683			return NULL;
4684		return get_file(s->sc_file->fi_deleg_file);
4685	case NFS4_OPEN_STID:
4686	case NFS4_LOCK_STID:
4687		if (flags & RD_STATE)
4688			return find_readable_file(s->sc_file);
4689		else
4690			return find_writeable_file(s->sc_file);
4691		break;
4692	}
4693
4694	return NULL;
4695}
4696
4697static __be32
4698nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
4699{
4700	__be32 status;
4701
4702	status = nfsd4_check_openowner_confirmed(ols);
4703	if (status)
4704		return status;
4705	return nfs4_check_openmode(ols, flags);
4706}
4707
4708/*
4709 * Checks for stateid operations
4710 */
4711__be32
4712nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
4713			   stateid_t *stateid, int flags, struct file **filpp)
4714{
4715	struct svc_fh *fhp = &cstate->current_fh;
4716	struct inode *ino = d_inode(fhp->fh_dentry);
4717	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
4718	struct nfs4_stid *s;
4719	__be32 status;
4720
4721	if (filpp)
4722		*filpp = NULL;
4723
4724	if (grace_disallows_io(net, ino))
4725		return nfserr_grace;
4726
4727	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4728		return check_special_stateids(net, fhp, stateid, flags);
4729
4730	status = nfsd4_lookup_stateid(cstate, stateid,
4731				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
4732				&s, nn);
4733	if (status)
4734		return status;
4735	status = check_stateid_generation(stateid, &s->sc_stateid,
4736			nfsd4_has_session(cstate));
4737	if (status)
4738		goto out;
4739
4740	switch (s->sc_type) {
4741	case NFS4_DELEG_STID:
4742		status = nfs4_check_delegmode(delegstateid(s), flags);
4743		break;
4744	case NFS4_OPEN_STID:
4745	case NFS4_LOCK_STID:
4746		status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
4747		break;
4748	default:
4749		status = nfserr_bad_stateid;
4750		break;
4751	}
4752	if (status)
4753		goto out;
4754	status = nfs4_check_fh(fhp, s);
4755
4756	if (!status && filpp) {
4757		*filpp = nfs4_find_file(s, flags);
4758		if (!*filpp)
4759			status = nfserr_serverfault;
4760	}
4761out:
4762	nfs4_put_stid(s);
4763	return status;
4764}
4765
4766/*
4767 * Test if the stateid is valid
4768 */
4769__be32
4770nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4771		   struct nfsd4_test_stateid *test_stateid)
4772{
4773	struct nfsd4_test_stateid_id *stateid;
4774	struct nfs4_client *cl = cstate->session->se_client;
4775
4776	list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
4777		stateid->ts_id_status =
4778			nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
4779
4780	return nfs_ok;
4781}
4782
4783__be32
4784nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4785		   struct nfsd4_free_stateid *free_stateid)
4786{
4787	stateid_t *stateid = &free_stateid->fr_stateid;
4788	struct nfs4_stid *s;
4789	struct nfs4_delegation *dp;
4790	struct nfs4_ol_stateid *stp;
4791	struct nfs4_client *cl = cstate->session->se_client;
4792	__be32 ret = nfserr_bad_stateid;
4793
4794	spin_lock(&cl->cl_lock);
4795	s = find_stateid_locked(cl, stateid);
4796	if (!s)
4797		goto out_unlock;
4798	switch (s->sc_type) {
4799	case NFS4_DELEG_STID:
4800		ret = nfserr_locks_held;
4801		break;
4802	case NFS4_OPEN_STID:
4803		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
4804		if (ret)
4805			break;
4806		ret = nfserr_locks_held;
4807		break;
4808	case NFS4_LOCK_STID:
4809		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
4810		if (ret)
4811			break;
4812		stp = openlockstateid(s);
4813		ret = nfserr_locks_held;
4814		if (check_for_locks(stp->st_stid.sc_file,
4815				    lockowner(stp->st_stateowner)))
4816			break;
4817		WARN_ON(!unhash_lock_stateid(stp));
4818		spin_unlock(&cl->cl_lock);
4819		nfs4_put_stid(s);
4820		ret = nfs_ok;
4821		goto out;
4822	case NFS4_REVOKED_DELEG_STID:
4823		dp = delegstateid(s);
4824		list_del_init(&dp->dl_recall_lru);
4825		spin_unlock(&cl->cl_lock);
4826		nfs4_put_stid(s);
4827		ret = nfs_ok;
4828		goto out;
4829	/* Default falls through and returns nfserr_bad_stateid */
4830	}
4831out_unlock:
4832	spin_unlock(&cl->cl_lock);
4833out:
4834	return ret;
4835}
4836
4837static inline int
4838setlkflg (int type)
4839{
4840	return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
4841		RD_STATE : WR_STATE;
4842}
4843
4844static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp)
4845{
4846	struct svc_fh *current_fh = &cstate->current_fh;
4847	struct nfs4_stateowner *sop = stp->st_stateowner;
4848	__be32 status;
4849
4850	status = nfsd4_check_seqid(cstate, sop, seqid);
4851	if (status)
4852		return status;
4853	if (stp->st_stid.sc_type == NFS4_CLOSED_STID
4854		|| stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID)
4855		/*
4856		 * "Closed" stateid's exist *only* to return
4857		 * nfserr_replay_me from the previous step, and
4858		 * revoked delegations are kept only for free_stateid.
4859		 */
4860		return nfserr_bad_stateid;
4861	down_write(&stp->st_rwsem);
4862	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
4863	if (status == nfs_ok)
4864		status = nfs4_check_fh(current_fh, &stp->st_stid);
4865	if (status != nfs_ok)
4866		up_write(&stp->st_rwsem);
4867	return status;
4868}
4869
4870/*
4871 * Checks for sequence id mutating operations.
4872 */
4873static __be32
4874nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
4875			 stateid_t *stateid, char typemask,
4876			 struct nfs4_ol_stateid **stpp,
4877			 struct nfsd_net *nn)
4878{
4879	__be32 status;
4880	struct nfs4_stid *s;
4881	struct nfs4_ol_stateid *stp = NULL;
4882
4883	dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
4884		seqid, STATEID_VAL(stateid));
4885
4886	*stpp = NULL;
4887	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
4888	if (status)
4889		return status;
4890	stp = openlockstateid(s);
4891	nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
4892
4893	status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
4894	if (!status)
4895		*stpp = stp;
4896	else
4897		nfs4_put_stid(&stp->st_stid);
4898	return status;
4899}
4900
4901static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
4902						 stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
4903{
4904	__be32 status;
4905	struct nfs4_openowner *oo;
4906	struct nfs4_ol_stateid *stp;
4907
4908	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
4909						NFS4_OPEN_STID, &stp, nn);
4910	if (status)
4911		return status;
4912	oo = openowner(stp->st_stateowner);
4913	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
4914		up_write(&stp->st_rwsem);
4915		nfs4_put_stid(&stp->st_stid);
4916		return nfserr_bad_stateid;
4917	}
4918	*stpp = stp;
4919	return nfs_ok;
4920}
4921
4922__be32
4923nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4924		   struct nfsd4_open_confirm *oc)
4925{
4926	__be32 status;
4927	struct nfs4_openowner *oo;
4928	struct nfs4_ol_stateid *stp;
4929	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4930
4931	dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
4932			cstate->current_fh.fh_dentry);
4933
4934	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
4935	if (status)
4936		return status;
4937
4938	status = nfs4_preprocess_seqid_op(cstate,
4939					oc->oc_seqid, &oc->oc_req_stateid,
4940					NFS4_OPEN_STID, &stp, nn);
4941	if (status)
4942		goto out;
4943	oo = openowner(stp->st_stateowner);
4944	status = nfserr_bad_stateid;
4945	if (oo->oo_flags & NFS4_OO_CONFIRMED) {
4946		up_write(&stp->st_rwsem);
4947		goto put_stateid;
4948	}
4949	oo->oo_flags |= NFS4_OO_CONFIRMED;
4950	update_stateid(&stp->st_stid.sc_stateid);
4951	memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4952	up_write(&stp->st_rwsem);
4953	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
4954		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
4955
4956	nfsd4_client_record_create(oo->oo_owner.so_client);
4957	status = nfs_ok;
4958put_stateid:
4959	nfs4_put_stid(&stp->st_stid);
4960out:
4961	nfsd4_bump_seqid(cstate, status);
4962	return status;
4963}
4964
4965static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access)
4966{
4967	if (!test_access(access, stp))
4968		return;
4969	nfs4_file_put_access(stp->st_stid.sc_file, access);
4970	clear_access(access, stp);
4971}
4972
4973static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access)
4974{
4975	switch (to_access) {
4976	case NFS4_SHARE_ACCESS_READ:
4977		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE);
4978		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
4979		break;
4980	case NFS4_SHARE_ACCESS_WRITE:
4981		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ);
4982		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
4983		break;
4984	case NFS4_SHARE_ACCESS_BOTH:
4985		break;
4986	default:
4987		WARN_ON_ONCE(1);
4988	}
4989}
4990
4991__be32
4992nfsd4_open_downgrade(struct svc_rqst *rqstp,
4993		     struct nfsd4_compound_state *cstate,
4994		     struct nfsd4_open_downgrade *od)
4995{
4996	__be32 status;
4997	struct nfs4_ol_stateid *stp;
4998	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4999
5000	dprintk("NFSD: nfsd4_open_downgrade on file %pd\n",
5001			cstate->current_fh.fh_dentry);
5002
5003	/* We don't yet support WANT bits: */
5004	if (od->od_deleg_want)
5005		dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
5006			od->od_deleg_want);
5007
5008	status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
5009					&od->od_stateid, &stp, nn);
5010	if (status)
5011		goto out;
5012	status = nfserr_inval;
5013	if (!test_access(od->od_share_access, stp)) {
5014		dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
5015			stp->st_access_bmap, od->od_share_access);
5016		goto put_stateid;
5017	}
5018	if (!test_deny(od->od_share_deny, stp)) {
5019		dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
5020			stp->st_deny_bmap, od->od_share_deny);
5021		goto put_stateid;
5022	}
5023	nfs4_stateid_downgrade(stp, od->od_share_access);
5024
5025	reset_union_bmap_deny(od->od_share_deny, stp);
5026
5027	update_stateid(&stp->st_stid.sc_stateid);
5028	memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
5029	status = nfs_ok;
5030put_stateid:
5031	up_write(&stp->st_rwsem);
5032	nfs4_put_stid(&stp->st_stid);
5033out:
5034	nfsd4_bump_seqid(cstate, status);
5035	return status;
5036}
5037
5038static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
5039{
5040	struct nfs4_client *clp = s->st_stid.sc_client;
5041	bool unhashed;
5042	LIST_HEAD(reaplist);
5043
5044	s->st_stid.sc_type = NFS4_CLOSED_STID;
5045	spin_lock(&clp->cl_lock);
5046	unhashed = unhash_open_stateid(s, &reaplist);
5047
5048	if (clp->cl_minorversion) {
5049		if (unhashed)
5050			put_ol_stateid_locked(s, &reaplist);
5051		spin_unlock(&clp->cl_lock);
5052		free_ol_stateid_reaplist(&reaplist);
5053	} else {
5054		spin_unlock(&clp->cl_lock);
5055		free_ol_stateid_reaplist(&reaplist);
5056		if (unhashed)
5057			move_to_close_lru(s, clp->net);
5058	}
5059}
5060
5061/*
5062 * nfs4_unlock_state() called after encode
5063 */
5064__be32
5065nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5066	    struct nfsd4_close *close)
5067{
5068	__be32 status;
5069	struct nfs4_ol_stateid *stp;
5070	struct net *net = SVC_NET(rqstp);
5071	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5072
5073	dprintk("NFSD: nfsd4_close on file %pd\n",
5074			cstate->current_fh.fh_dentry);
5075
5076	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
5077					&close->cl_stateid,
5078					NFS4_OPEN_STID|NFS4_CLOSED_STID,
5079					&stp, nn);
5080	nfsd4_bump_seqid(cstate, status);
5081	if (status)
5082		goto out;
5083	update_stateid(&stp->st_stid.sc_stateid);
5084	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
5085	up_write(&stp->st_rwsem);
5086
5087	nfsd4_close_open_stateid(stp);
5088
5089	/* put reference from nfs4_preprocess_seqid_op */
5090	nfs4_put_stid(&stp->st_stid);
5091out:
5092	return status;
5093}
5094
5095__be32
5096nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5097		  struct nfsd4_delegreturn *dr)
5098{
5099	struct nfs4_delegation *dp;
5100	stateid_t *stateid = &dr->dr_stateid;
5101	struct nfs4_stid *s;
5102	__be32 status;
5103	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5104
5105	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
5106		return status;
5107
5108	status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
5109	if (status)
5110		goto out;
5111	dp = delegstateid(s);
5112	status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
5113	if (status)
5114		goto put_stateid;
5115
5116	destroy_delegation(dp);
5117put_stateid:
5118	nfs4_put_stid(&dp->dl_stid);
5119out:
5120	return status;
5121}
5122
5123
5124#define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
5125
5126static inline u64
5127end_offset(u64 start, u64 len)
5128{
5129	u64 end;
5130
5131	end = start + len;
5132	return end >= start ? end: NFS4_MAX_UINT64;
5133}
5134
5135/* last octet in a range */
5136static inline u64
5137last_byte_offset(u64 start, u64 len)
5138{
5139	u64 end;
5140
5141	WARN_ON_ONCE(!len);
5142	end = start + len;
5143	return end > start ? end - 1: NFS4_MAX_UINT64;
5144}
5145
5146/*
5147 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
5148 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
5149 * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
5150 * locking, this prevents us from being completely protocol-compliant.  The
5151 * real solution to this problem is to start using unsigned file offsets in
5152 * the VFS, but this is a very deep change!
5153 */
5154static inline void
5155nfs4_transform_lock_offset(struct file_lock *lock)
5156{
5157	if (lock->fl_start < 0)
5158		lock->fl_start = OFFSET_MAX;
5159	if (lock->fl_end < 0)
5160		lock->fl_end = OFFSET_MAX;
5161}
5162
5163static fl_owner_t
5164nfsd4_fl_get_owner(fl_owner_t owner)
5165{
5166	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
5167
5168	nfs4_get_stateowner(&lo->lo_owner);
5169	return owner;
5170}
5171
5172static void
5173nfsd4_fl_put_owner(fl_owner_t owner)
5174{
5175	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
5176
5177	if (lo)
5178		nfs4_put_stateowner(&lo->lo_owner);
5179}
5180
5181static const struct lock_manager_operations nfsd_posix_mng_ops  = {
5182	.lm_get_owner = nfsd4_fl_get_owner,
5183	.lm_put_owner = nfsd4_fl_put_owner,
5184};
5185
5186static inline void
5187nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
5188{
5189	struct nfs4_lockowner *lo;
5190
5191	if (fl->fl_lmops == &nfsd_posix_mng_ops) {
5192		lo = (struct nfs4_lockowner *) fl->fl_owner;
5193		deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
5194					lo->lo_owner.so_owner.len, GFP_KERNEL);
5195		if (!deny->ld_owner.data)
5196			/* We just don't care that much */
5197			goto nevermind;
5198		deny->ld_owner.len = lo->lo_owner.so_owner.len;
5199		deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
5200	} else {
5201nevermind:
5202		deny->ld_owner.len = 0;
5203		deny->ld_owner.data = NULL;
5204		deny->ld_clientid.cl_boot = 0;
5205		deny->ld_clientid.cl_id = 0;
5206	}
5207	deny->ld_start = fl->fl_start;
5208	deny->ld_length = NFS4_MAX_UINT64;
5209	if (fl->fl_end != NFS4_MAX_UINT64)
5210		deny->ld_length = fl->fl_end - fl->fl_start + 1;
5211	deny->ld_type = NFS4_READ_LT;
5212	if (fl->fl_type != F_RDLCK)
5213		deny->ld_type = NFS4_WRITE_LT;
5214}
5215
5216static struct nfs4_lockowner *
5217find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
5218		struct nfs4_client *clp)
5219{
5220	unsigned int strhashval = ownerstr_hashval(owner);
5221	struct nfs4_stateowner *so;
5222
5223	lockdep_assert_held(&clp->cl_lock);
5224
5225	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
5226			    so_strhash) {
5227		if (so->so_is_open_owner)
5228			continue;
5229		if (same_owner_str(so, owner))
5230			return lockowner(nfs4_get_stateowner(so));
5231	}
5232	return NULL;
5233}
5234
5235static struct nfs4_lockowner *
5236find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
5237		struct nfs4_client *clp)
5238{
5239	struct nfs4_lockowner *lo;
5240
5241	spin_lock(&clp->cl_lock);
5242	lo = find_lockowner_str_locked(clid, owner, clp);
5243	spin_unlock(&clp->cl_lock);
5244	return lo;
5245}
5246
5247static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
5248{
5249	unhash_lockowner_locked(lockowner(sop));
5250}
5251
5252static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
5253{
5254	struct nfs4_lockowner *lo = lockowner(sop);
5255
5256	kmem_cache_free(lockowner_slab, lo);
5257}
5258
5259static const struct nfs4_stateowner_operations lockowner_ops = {
5260	.so_unhash =	nfs4_unhash_lockowner,
5261	.so_free =	nfs4_free_lockowner,
5262};
5263
5264/*
5265 * Alloc a lock owner structure.
5266 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
5267 * occurred.
5268 *
5269 * strhashval = ownerstr_hashval
5270 */
5271static struct nfs4_lockowner *
5272alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5273			   struct nfs4_ol_stateid *open_stp,
5274			   struct nfsd4_lock *lock)
5275{
5276	struct nfs4_lockowner *lo, *ret;
5277
5278	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
5279	if (!lo)
5280		return NULL;
5281	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
5282	lo->lo_owner.so_is_open_owner = 0;
5283	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
5284	lo->lo_owner.so_ops = &lockowner_ops;
5285	spin_lock(&clp->cl_lock);
5286	ret = find_lockowner_str_locked(&clp->cl_clientid,
5287			&lock->lk_new_owner, clp);
5288	if (ret == NULL) {
5289		list_add(&lo->lo_owner.so_strhash,
5290			 &clp->cl_ownerstr_hashtbl[strhashval]);
5291		ret = lo;
5292	} else
5293		nfs4_free_lockowner(&lo->lo_owner);
5294	spin_unlock(&clp->cl_lock);
5295	return ret;
5296}
5297
5298static void
5299init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
5300		  struct nfs4_file *fp, struct inode *inode,
5301		  struct nfs4_ol_stateid *open_stp)
5302{
5303	struct nfs4_client *clp = lo->lo_owner.so_client;
5304
5305	lockdep_assert_held(&clp->cl_lock);
5306
5307	atomic_inc(&stp->st_stid.sc_count);
5308	stp->st_stid.sc_type = NFS4_LOCK_STID;
5309	stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
5310	get_nfs4_file(fp);
5311	stp->st_stid.sc_file = fp;
5312	stp->st_stid.sc_free = nfs4_free_lock_stateid;
5313	stp->st_access_bmap = 0;
5314	stp->st_deny_bmap = open_stp->st_deny_bmap;
5315	stp->st_openstp = open_stp;
5316	init_rwsem(&stp->st_rwsem);
5317	list_add(&stp->st_locks, &open_stp->st_locks);
5318	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
5319	spin_lock(&fp->fi_lock);
5320	list_add(&stp->st_perfile, &fp->fi_stateids);
5321	spin_unlock(&fp->fi_lock);
5322}
5323
5324static struct nfs4_ol_stateid *
5325find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
5326{
5327	struct nfs4_ol_stateid *lst;
5328	struct nfs4_client *clp = lo->lo_owner.so_client;
5329
5330	lockdep_assert_held(&clp->cl_lock);
5331
5332	list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
5333		if (lst->st_stid.sc_file == fp) {
5334			atomic_inc(&lst->st_stid.sc_count);
5335			return lst;
5336		}
5337	}
5338	return NULL;
5339}
5340
5341static struct nfs4_ol_stateid *
5342find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
5343			    struct inode *inode, struct nfs4_ol_stateid *ost,
5344			    bool *new)
5345{
5346	struct nfs4_stid *ns = NULL;
5347	struct nfs4_ol_stateid *lst;
5348	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
5349	struct nfs4_client *clp = oo->oo_owner.so_client;
5350
5351	spin_lock(&clp->cl_lock);
5352	lst = find_lock_stateid(lo, fi);
5353	if (lst == NULL) {
5354		spin_unlock(&clp->cl_lock);
5355		ns = nfs4_alloc_stid(clp, stateid_slab);
5356		if (ns == NULL)
5357			return NULL;
5358
5359		spin_lock(&clp->cl_lock);
5360		lst = find_lock_stateid(lo, fi);
5361		if (likely(!lst)) {
5362			lst = openlockstateid(ns);
5363			init_lock_stateid(lst, lo, fi, inode, ost);
5364			ns = NULL;
5365			*new = true;
5366		}
5367	}
5368	spin_unlock(&clp->cl_lock);
5369	if (ns)
5370		nfs4_put_stid(ns);
5371	return lst;
5372}
5373
5374static int
5375check_lock_length(u64 offset, u64 length)
5376{
5377	return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
5378	     LOFF_OVERFLOW(offset, length)));
5379}
5380
5381static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
5382{
5383	struct nfs4_file *fp = lock_stp->st_stid.sc_file;
5384
5385	lockdep_assert_held(&fp->fi_lock);
5386
5387	if (test_access(access, lock_stp))
5388		return;
5389	__nfs4_file_get_access(fp, access);
5390	set_access(access, lock_stp);
5391}
5392
5393static __be32
5394lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5395			    struct nfs4_ol_stateid *ost,
5396			    struct nfsd4_lock *lock,
5397			    struct nfs4_ol_stateid **lst, bool *new)
5398{
5399	__be32 status;
5400	struct nfs4_file *fi = ost->st_stid.sc_file;
5401	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
5402	struct nfs4_client *cl = oo->oo_owner.so_client;
5403	struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
5404	struct nfs4_lockowner *lo;
5405	unsigned int strhashval;
5406
5407	lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
5408	if (!lo) {
5409		strhashval = ownerstr_hashval(&lock->v.new.owner);
5410		lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
5411		if (lo == NULL)
5412			return nfserr_jukebox;
5413	} else {
5414		/* with an existing lockowner, seqids must be the same */
5415		status = nfserr_bad_seqid;
5416		if (!cstate->minorversion &&
5417		    lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
5418			goto out;
5419	}
5420
5421	*lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
5422	if (*lst == NULL) {
5423		status = nfserr_jukebox;
5424		goto out;
5425	}
5426	status = nfs_ok;
5427out:
5428	nfs4_put_stateowner(&lo->lo_owner);
5429	return status;
5430}
5431
5432/*
5433 *  LOCK operation
5434 */
5435__be32
5436nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5437	   struct nfsd4_lock *lock)
5438{
5439	struct nfs4_openowner *open_sop = NULL;
5440	struct nfs4_lockowner *lock_sop = NULL;
5441	struct nfs4_ol_stateid *lock_stp = NULL;
5442	struct nfs4_ol_stateid *open_stp = NULL;
5443	struct nfs4_file *fp;
5444	struct file *filp = NULL;
5445	struct file_lock *file_lock = NULL;
5446	struct file_lock *conflock = NULL;
5447	__be32 status = 0;
5448	int lkflg;
5449	int err;
5450	bool new = false;
5451	struct net *net = SVC_NET(rqstp);
5452	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5453
5454	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
5455		(long long) lock->lk_offset,
5456		(long long) lock->lk_length);
5457
5458	if (check_lock_length(lock->lk_offset, lock->lk_length))
5459		 return nfserr_inval;
5460
5461	if ((status = fh_verify(rqstp, &cstate->current_fh,
5462				S_IFREG, NFSD_MAY_LOCK))) {
5463		dprintk("NFSD: nfsd4_lock: permission denied!\n");
5464		return status;
5465	}
5466
5467	if (lock->lk_is_new) {
5468		if (nfsd4_has_session(cstate))
5469			/* See rfc 5661 18.10.3: given clientid is ignored: */
5470			memcpy(&lock->v.new.clientid,
5471				&cstate->session->se_client->cl_clientid,
5472				sizeof(clientid_t));
5473
5474		status = nfserr_stale_clientid;
5475		if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
5476			goto out;
5477
5478		/* validate and update open stateid and open seqid */
5479		status = nfs4_preprocess_confirmed_seqid_op(cstate,
5480				        lock->lk_new_open_seqid,
5481		                        &lock->lk_new_open_stateid,
5482					&open_stp, nn);
5483		if (status)
5484			goto out;
5485		up_write(&open_stp->st_rwsem);
5486		open_sop = openowner(open_stp->st_stateowner);
5487		status = nfserr_bad_stateid;
5488		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
5489						&lock->v.new.clientid))
5490			goto out;
5491		status = lookup_or_create_lock_state(cstate, open_stp, lock,
5492							&lock_stp, &new);
5493		if (status == nfs_ok)
5494			down_write(&lock_stp->st_rwsem);
5495	} else {
5496		status = nfs4_preprocess_seqid_op(cstate,
5497				       lock->lk_old_lock_seqid,
5498				       &lock->lk_old_lock_stateid,
5499				       NFS4_LOCK_STID, &lock_stp, nn);
5500	}
5501	if (status)
5502		goto out;
5503	lock_sop = lockowner(lock_stp->st_stateowner);
5504
5505	lkflg = setlkflg(lock->lk_type);
5506	status = nfs4_check_openmode(lock_stp, lkflg);
5507	if (status)
5508		goto out;
5509
5510	status = nfserr_grace;
5511	if (locks_in_grace(net) && !lock->lk_reclaim)
5512		goto out;
5513	status = nfserr_no_grace;
5514	if (!locks_in_grace(net) && lock->lk_reclaim)
5515		goto out;
5516
5517	file_lock = locks_alloc_lock();
5518	if (!file_lock) {
5519		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
5520		status = nfserr_jukebox;
5521		goto out;
5522	}
5523
5524	fp = lock_stp->st_stid.sc_file;
5525	switch (lock->lk_type) {
5526		case NFS4_READ_LT:
5527		case NFS4_READW_LT:
5528			spin_lock(&fp->fi_lock);
5529			filp = find_readable_file_locked(fp);
5530			if (filp)
5531				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
5532			spin_unlock(&fp->fi_lock);
5533			file_lock->fl_type = F_RDLCK;
5534			break;
5535		case NFS4_WRITE_LT:
5536		case NFS4_WRITEW_LT:
5537			spin_lock(&fp->fi_lock);
5538			filp = find_writeable_file_locked(fp);
5539			if (filp)
5540				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
5541			spin_unlock(&fp->fi_lock);
5542			file_lock->fl_type = F_WRLCK;
5543			break;
5544		default:
5545			status = nfserr_inval;
5546		goto out;
5547	}
5548	if (!filp) {
5549		status = nfserr_openmode;
5550		goto out;
5551	}
5552
5553	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
5554	file_lock->fl_pid = current->tgid;
5555	file_lock->fl_file = filp;
5556	file_lock->fl_flags = FL_POSIX;
5557	file_lock->fl_lmops = &nfsd_posix_mng_ops;
5558	file_lock->fl_start = lock->lk_offset;
5559	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
5560	nfs4_transform_lock_offset(file_lock);
5561
5562	conflock = locks_alloc_lock();
5563	if (!conflock) {
5564		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
5565		status = nfserr_jukebox;
5566		goto out;
5567	}
5568
5569	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
5570	switch (-err) {
5571	case 0: /* success! */
5572		update_stateid(&lock_stp->st_stid.sc_stateid);
5573		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid,
5574				sizeof(stateid_t));
5575		status = 0;
5576		break;
5577	case (EAGAIN):		/* conflock holds conflicting lock */
5578		status = nfserr_denied;
5579		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
5580		nfs4_set_lock_denied(conflock, &lock->lk_denied);
5581		break;
5582	case (EDEADLK):
5583		status = nfserr_deadlock;
5584		break;
5585	default:
5586		dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
5587		status = nfserrno(err);
5588		break;
5589	}
5590out:
5591	if (filp)
5592		fput(filp);
5593	if (lock_stp) {
5594		/* Bump seqid manually if the 4.0 replay owner is openowner */
5595		if (cstate->replay_owner &&
5596		    cstate->replay_owner != &lock_sop->lo_owner &&
5597		    seqid_mutating_err(ntohl(status)))
5598			lock_sop->lo_owner.so_seqid++;
5599
5600		up_write(&lock_stp->st_rwsem);
5601
5602		/*
5603		 * If this is a new, never-before-used stateid, and we are
5604		 * returning an error, then just go ahead and release it.
5605		 */
5606		if (status && new)
5607			release_lock_stateid(lock_stp);
5608
5609		nfs4_put_stid(&lock_stp->st_stid);
5610	}
5611	if (open_stp)
5612		nfs4_put_stid(&open_stp->st_stid);
5613	nfsd4_bump_seqid(cstate, status);
5614	if (file_lock)
5615		locks_free_lock(file_lock);
5616	if (conflock)
5617		locks_free_lock(conflock);
5618	return status;
5619}
5620
5621/*
5622 * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
5623 * so we do a temporary open here just to get an open file to pass to
5624 * vfs_test_lock.  (Arguably perhaps test_lock should be done with an
5625 * inode operation.)
5626 */
5627static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
5628{
5629	struct file *file;
5630	__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
5631	if (!err) {
5632		err = nfserrno(vfs_test_lock(file, lock));
5633		nfsd_close(file);
5634	}
5635	return err;
5636}
5637
5638/*
5639 * LOCKT operation
5640 */
5641__be32
5642nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5643	    struct nfsd4_lockt *lockt)
5644{
5645	struct file_lock *file_lock = NULL;
5646	struct nfs4_lockowner *lo = NULL;
5647	__be32 status;
5648	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5649
5650	if (locks_in_grace(SVC_NET(rqstp)))
5651		return nfserr_grace;
5652
5653	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
5654		 return nfserr_inval;
5655
5656	if (!nfsd4_has_session(cstate)) {
5657		status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
5658		if (status)
5659			goto out;
5660	}
5661
5662	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
5663		goto out;
5664
5665	file_lock = locks_alloc_lock();
5666	if (!file_lock) {
5667		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
5668		status = nfserr_jukebox;
5669		goto out;
5670	}
5671
5672	switch (lockt->lt_type) {
5673		case NFS4_READ_LT:
5674		case NFS4_READW_LT:
5675			file_lock->fl_type = F_RDLCK;
5676		break;
5677		case NFS4_WRITE_LT:
5678		case NFS4_WRITEW_LT:
5679			file_lock->fl_type = F_WRLCK;
5680		break;
5681		default:
5682			dprintk("NFSD: nfs4_lockt: bad lock type!\n");
5683			status = nfserr_inval;
5684		goto out;
5685	}
5686
5687	lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
5688				cstate->clp);
5689	if (lo)
5690		file_lock->fl_owner = (fl_owner_t)lo;
5691	file_lock->fl_pid = current->tgid;
5692	file_lock->fl_flags = FL_POSIX;
5693
5694	file_lock->fl_start = lockt->lt_offset;
5695	file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
5696
5697	nfs4_transform_lock_offset(file_lock);
5698
5699	status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock);
5700	if (status)
5701		goto out;
5702
5703	if (file_lock->fl_type != F_UNLCK) {
5704		status = nfserr_denied;
5705		nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
5706	}
5707out:
5708	if (lo)
5709		nfs4_put_stateowner(&lo->lo_owner);
5710	if (file_lock)
5711		locks_free_lock(file_lock);
5712	return status;
5713}
5714
5715__be32
5716nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5717	    struct nfsd4_locku *locku)
5718{
5719	struct nfs4_ol_stateid *stp;
5720	struct file *filp = NULL;
5721	struct file_lock *file_lock = NULL;
5722	__be32 status;
5723	int err;
5724	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5725
5726	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
5727		(long long) locku->lu_offset,
5728		(long long) locku->lu_length);
5729
5730	if (check_lock_length(locku->lu_offset, locku->lu_length))
5731		 return nfserr_inval;
5732
5733	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
5734					&locku->lu_stateid, NFS4_LOCK_STID,
5735					&stp, nn);
5736	if (status)
5737		goto out;
5738	filp = find_any_file(stp->st_stid.sc_file);
5739	if (!filp) {
5740		status = nfserr_lock_range;
5741		goto put_stateid;
5742	}
5743	file_lock = locks_alloc_lock();
5744	if (!file_lock) {
5745		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
5746		status = nfserr_jukebox;
5747		goto fput;
5748	}
5749
5750	file_lock->fl_type = F_UNLCK;
5751	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
5752	file_lock->fl_pid = current->tgid;
5753	file_lock->fl_file = filp;
5754	file_lock->fl_flags = FL_POSIX;
5755	file_lock->fl_lmops = &nfsd_posix_mng_ops;
5756	file_lock->fl_start = locku->lu_offset;
5757
5758	file_lock->fl_end = last_byte_offset(locku->lu_offset,
5759						locku->lu_length);
5760	nfs4_transform_lock_offset(file_lock);
5761
5762	err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
5763	if (err) {
5764		dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
5765		goto out_nfserr;
5766	}
5767	update_stateid(&stp->st_stid.sc_stateid);
5768	memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
5769fput:
5770	fput(filp);
5771put_stateid:
5772	up_write(&stp->st_rwsem);
5773	nfs4_put_stid(&stp->st_stid);
5774out:
5775	nfsd4_bump_seqid(cstate, status);
5776	if (file_lock)
5777		locks_free_lock(file_lock);
5778	return status;
5779
5780out_nfserr:
5781	status = nfserrno(err);
5782	goto fput;
5783}
5784
5785/*
5786 * returns
5787 * 	true:  locks held by lockowner
5788 * 	false: no locks held by lockowner
5789 */
5790static bool
5791check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
5792{
5793	struct file_lock *fl;
5794	int status = false;
5795	struct file *filp = find_any_file(fp);
5796	struct inode *inode;
5797	struct file_lock_context *flctx;
5798
5799	if (!filp) {
5800		/* Any valid lock stateid should have some sort of access */
5801		WARN_ON_ONCE(1);
5802		return status;
5803	}
5804
5805	inode = file_inode(filp);
5806	flctx = inode->i_flctx;
5807
5808	if (flctx && !list_empty_careful(&flctx->flc_posix)) {
5809		spin_lock(&flctx->flc_lock);
5810		list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
5811			if (fl->fl_owner == (fl_owner_t)lowner) {
5812				status = true;
5813				break;
5814			}
5815		}
5816		spin_unlock(&flctx->flc_lock);
5817	}
5818	fput(filp);
5819	return status;
5820}
5821
5822__be32
5823nfsd4_release_lockowner(struct svc_rqst *rqstp,
5824			struct nfsd4_compound_state *cstate,
5825			struct nfsd4_release_lockowner *rlockowner)
5826{
5827	clientid_t *clid = &rlockowner->rl_clientid;
5828	struct nfs4_stateowner *sop;
5829	struct nfs4_lockowner *lo = NULL;
5830	struct nfs4_ol_stateid *stp;
5831	struct xdr_netobj *owner = &rlockowner->rl_owner;
5832	unsigned int hashval = ownerstr_hashval(owner);
5833	__be32 status;
5834	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5835	struct nfs4_client *clp;
5836
5837	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
5838		clid->cl_boot, clid->cl_id);
5839
5840	status = lookup_clientid(clid, cstate, nn);
5841	if (status)
5842		return status;
5843
5844	clp = cstate->clp;
5845	/* Find the matching lock stateowner */
5846	spin_lock(&clp->cl_lock);
5847	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
5848			    so_strhash) {
5849
5850		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
5851			continue;
5852
5853		/* see if there are still any locks associated with it */
5854		lo = lockowner(sop);
5855		list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
5856			if (check_for_locks(stp->st_stid.sc_file, lo)) {
5857				status = nfserr_locks_held;
5858				spin_unlock(&clp->cl_lock);
5859				return status;
5860			}
5861		}
5862
5863		nfs4_get_stateowner(sop);
5864		break;
5865	}
5866	spin_unlock(&clp->cl_lock);
5867	if (lo)
5868		release_lockowner(lo);
5869	return status;
5870}
5871
5872static inline struct nfs4_client_reclaim *
5873alloc_reclaim(void)
5874{
5875	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
5876}
5877
5878bool
5879nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
5880{
5881	struct nfs4_client_reclaim *crp;
5882
5883	crp = nfsd4_find_reclaim_client(name, nn);
5884	return (crp && crp->cr_clp);
5885}
5886
5887/*
5888 * failure => all reset bets are off, nfserr_no_grace...
5889 */
5890struct nfs4_client_reclaim *
5891nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
5892{
5893	unsigned int strhashval;
5894	struct nfs4_client_reclaim *crp;
5895
5896	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
5897	crp = alloc_reclaim();
5898	if (crp) {
5899		strhashval = clientstr_hashval(name);
5900		INIT_LIST_HEAD(&crp->cr_strhash);
5901		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
5902		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
5903		crp->cr_clp = NULL;
5904		nn->reclaim_str_hashtbl_size++;
5905	}
5906	return crp;
5907}
5908
5909void
5910nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
5911{
5912	list_del(&crp->cr_strhash);
5913	kfree(crp);
5914	nn->reclaim_str_hashtbl_size--;
5915}
5916
5917void
5918nfs4_release_reclaim(struct nfsd_net *nn)
5919{
5920	struct nfs4_client_reclaim *crp = NULL;
5921	int i;
5922
5923	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
5924		while (!list_empty(&nn->reclaim_str_hashtbl[i])) {
5925			crp = list_entry(nn->reclaim_str_hashtbl[i].next,
5926			                struct nfs4_client_reclaim, cr_strhash);
5927			nfs4_remove_reclaim_record(crp, nn);
5928		}
5929	}
5930	WARN_ON_ONCE(nn->reclaim_str_hashtbl_size);
5931}
5932
5933/*
5934 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
5935struct nfs4_client_reclaim *
5936nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
5937{
5938	unsigned int strhashval;
5939	struct nfs4_client_reclaim *crp = NULL;
5940
5941	dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
5942
5943	strhashval = clientstr_hashval(recdir);
5944	list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
5945		if (same_name(crp->cr_recdir, recdir)) {
5946			return crp;
5947		}
5948	}
5949	return NULL;
5950}
5951
5952/*
5953* Called from OPEN. Look for clientid in reclaim list.
5954*/
5955__be32
5956nfs4_check_open_reclaim(clientid_t *clid,
5957		struct nfsd4_compound_state *cstate,
5958		struct nfsd_net *nn)
5959{
5960	__be32 status;
5961
5962	/* find clientid in conf_id_hashtbl */
5963	status = lookup_clientid(clid, cstate, nn);
5964	if (status)
5965		return nfserr_reclaim_bad;
5966
5967	if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
5968		return nfserr_no_grace;
5969
5970	if (nfsd4_client_record_check(cstate->clp))
5971		return nfserr_reclaim_bad;
5972
5973	return nfs_ok;
5974}
5975
5976#ifdef CONFIG_NFSD_FAULT_INJECTION
5977static inline void
5978put_client(struct nfs4_client *clp)
5979{
5980	atomic_dec(&clp->cl_refcount);
5981}
5982
5983static struct nfs4_client *
5984nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
5985{
5986	struct nfs4_client *clp;
5987	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5988					  nfsd_net_id);
5989
5990	if (!nfsd_netns_ready(nn))
5991		return NULL;
5992
5993	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5994		if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
5995			return clp;
5996	}
5997	return NULL;
5998}
5999
6000u64
6001nfsd_inject_print_clients(void)
6002{
6003	struct nfs4_client *clp;
6004	u64 count = 0;
6005	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6006					  nfsd_net_id);
6007	char buf[INET6_ADDRSTRLEN];
6008
6009	if (!nfsd_netns_ready(nn))
6010		return 0;
6011
6012	spin_lock(&nn->client_lock);
6013	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6014		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
6015		pr_info("NFS Client: %s\n", buf);
6016		++count;
6017	}
6018	spin_unlock(&nn->client_lock);
6019
6020	return count;
6021}
6022
6023u64
6024nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
6025{
6026	u64 count = 0;
6027	struct nfs4_client *clp;
6028	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6029					  nfsd_net_id);
6030
6031	if (!nfsd_netns_ready(nn))
6032		return count;
6033
6034	spin_lock(&nn->client_lock);
6035	clp = nfsd_find_client(addr, addr_size);
6036	if (clp) {
6037		if (mark_client_expired_locked(clp) == nfs_ok)
6038			++count;
6039		else
6040			clp = NULL;
6041	}
6042	spin_unlock(&nn->client_lock);
6043
6044	if (clp)
6045		expire_client(clp);
6046
6047	return count;
6048}
6049
6050u64
6051nfsd_inject_forget_clients(u64 max)
6052{
6053	u64 count = 0;
6054	struct nfs4_client *clp, *next;
6055	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6056						nfsd_net_id);
6057	LIST_HEAD(reaplist);
6058
6059	if (!nfsd_netns_ready(nn))
6060		return count;
6061
6062	spin_lock(&nn->client_lock);
6063	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
6064		if (mark_client_expired_locked(clp) == nfs_ok) {
6065			list_add(&clp->cl_lru, &reaplist);
6066			if (max != 0 && ++count >= max)
6067				break;
6068		}
6069	}
6070	spin_unlock(&nn->client_lock);
6071
6072	list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
6073		expire_client(clp);
6074
6075	return count;
6076}
6077
6078static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
6079			     const char *type)
6080{
6081	char buf[INET6_ADDRSTRLEN];
6082	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
6083	printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
6084}
6085
6086static void
6087nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
6088			     struct list_head *collect)
6089{
6090	struct nfs4_client *clp = lst->st_stid.sc_client;
6091	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6092					  nfsd_net_id);
6093
6094	if (!collect)
6095		return;
6096
6097	lockdep_assert_held(&nn->client_lock);
6098	atomic_inc(&clp->cl_refcount);
6099	list_add(&lst->st_locks, collect);
6100}
6101
6102static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
6103				    struct list_head *collect,
6104				    bool (*func)(struct nfs4_ol_stateid *))
6105{
6106	struct nfs4_openowner *oop;
6107	struct nfs4_ol_stateid *stp, *st_next;
6108	struct nfs4_ol_stateid *lst, *lst_next;
6109	u64 count = 0;
6110
6111	spin_lock(&clp->cl_lock);
6112	list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
6113		list_for_each_entry_safe(stp, st_next,
6114				&oop->oo_owner.so_stateids, st_perstateowner) {
6115			list_for_each_entry_safe(lst, lst_next,
6116					&stp->st_locks, st_locks) {
6117				if (func) {
6118					if (func(lst))
6119						nfsd_inject_add_lock_to_list(lst,
6120									collect);
6121				}
6122				++count;
6123				/*
6124				 * Despite the fact that these functions deal
6125				 * with 64-bit integers for "count", we must
6126				 * ensure that it doesn't blow up the
6127				 * clp->cl_refcount. Throw a warning if we
6128				 * start to approach INT_MAX here.
6129				 */
6130				WARN_ON_ONCE(count == (INT_MAX / 2));
6131				if (count == max)
6132					goto out;
6133			}
6134		}
6135	}
6136out:
6137	spin_unlock(&clp->cl_lock);
6138
6139	return count;
6140}
6141
6142static u64
6143nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
6144			  u64 max)
6145{
6146	return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
6147}
6148
6149static u64
6150nfsd_print_client_locks(struct nfs4_client *clp)
6151{
6152	u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
6153	nfsd_print_count(clp, count, "locked files");
6154	return count;
6155}
6156
6157u64
6158nfsd_inject_print_locks(void)
6159{
6160	struct nfs4_client *clp;
6161	u64 count = 0;
6162	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6163						nfsd_net_id);
6164
6165	if (!nfsd_netns_ready(nn))
6166		return 0;
6167
6168	spin_lock(&nn->client_lock);
6169	list_for_each_entry(clp, &nn->client_lru, cl_lru)
6170		count += nfsd_print_client_locks(clp);
6171	spin_unlock(&nn->client_lock);
6172
6173	return count;
6174}
6175
6176static void
6177nfsd_reap_locks(struct list_head *reaplist)
6178{
6179	struct nfs4_client *clp;
6180	struct nfs4_ol_stateid *stp, *next;
6181
6182	list_for_each_entry_safe(stp, next, reaplist, st_locks) {
6183		list_del_init(&stp->st_locks);
6184		clp = stp->st_stid.sc_client;
6185		nfs4_put_stid(&stp->st_stid);
6186		put_client(clp);
6187	}
6188}
6189
6190u64
6191nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
6192{
6193	unsigned int count = 0;
6194	struct nfs4_client *clp;
6195	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6196						nfsd_net_id);
6197	LIST_HEAD(reaplist);
6198
6199	if (!nfsd_netns_ready(nn))
6200		return count;
6201
6202	spin_lock(&nn->client_lock);
6203	clp = nfsd_find_client(addr, addr_size);
6204	if (clp)
6205		count = nfsd_collect_client_locks(clp, &reaplist, 0);
6206	spin_unlock(&nn->client_lock);
6207	nfsd_reap_locks(&reaplist);
6208	return count;
6209}
6210
6211u64
6212nfsd_inject_forget_locks(u64 max)
6213{
6214	u64 count = 0;
6215	struct nfs4_client *clp;
6216	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6217						nfsd_net_id);
6218	LIST_HEAD(reaplist);
6219
6220	if (!nfsd_netns_ready(nn))
6221		return count;
6222
6223	spin_lock(&nn->client_lock);
6224	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6225		count += nfsd_collect_client_locks(clp, &reaplist, max - count);
6226		if (max != 0 && count >= max)
6227			break;
6228	}
6229	spin_unlock(&nn->client_lock);
6230	nfsd_reap_locks(&reaplist);
6231	return count;
6232}
6233
6234static u64
6235nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
6236			      struct list_head *collect,
6237			      void (*func)(struct nfs4_openowner *))
6238{
6239	struct nfs4_openowner *oop, *next;
6240	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6241						nfsd_net_id);
6242	u64 count = 0;
6243
6244	lockdep_assert_held(&nn->client_lock);
6245
6246	spin_lock(&clp->cl_lock);
6247	list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
6248		if (func) {
6249			func(oop);
6250			if (collect) {
6251				atomic_inc(&clp->cl_refcount);
6252				list_add(&oop->oo_perclient, collect);
6253			}
6254		}
6255		++count;
6256		/*
6257		 * Despite the fact that these functions deal with
6258		 * 64-bit integers for "count", we must ensure that
6259		 * it doesn't blow up the clp->cl_refcount. Throw a
6260		 * warning if we start to approach INT_MAX here.
6261		 */
6262		WARN_ON_ONCE(count == (INT_MAX / 2));
6263		if (count == max)
6264			break;
6265	}
6266	spin_unlock(&clp->cl_lock);
6267
6268	return count;
6269}
6270
6271static u64
6272nfsd_print_client_openowners(struct nfs4_client *clp)
6273{
6274	u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);
6275
6276	nfsd_print_count(clp, count, "openowners");
6277	return count;
6278}
6279
6280static u64
6281nfsd_collect_client_openowners(struct nfs4_client *clp,
6282			       struct list_head *collect, u64 max)
6283{
6284	return nfsd_foreach_client_openowner(clp, max, collect,
6285						unhash_openowner_locked);
6286}
6287
6288u64
6289nfsd_inject_print_openowners(void)
6290{
6291	struct nfs4_client *clp;
6292	u64 count = 0;
6293	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6294						nfsd_net_id);
6295
6296	if (!nfsd_netns_ready(nn))
6297		return 0;
6298
6299	spin_lock(&nn->client_lock);
6300	list_for_each_entry(clp, &nn->client_lru, cl_lru)
6301		count += nfsd_print_client_openowners(clp);
6302	spin_unlock(&nn->client_lock);
6303
6304	return count;
6305}
6306
6307static void
6308nfsd_reap_openowners(struct list_head *reaplist)
6309{
6310	struct nfs4_client *clp;
6311	struct nfs4_openowner *oop, *next;
6312
6313	list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
6314		list_del_init(&oop->oo_perclient);
6315		clp = oop->oo_owner.so_client;
6316		release_openowner(oop);
6317		put_client(clp);
6318	}
6319}
6320
6321u64
6322nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
6323				     size_t addr_size)
6324{
6325	unsigned int count = 0;
6326	struct nfs4_client *clp;
6327	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6328						nfsd_net_id);
6329	LIST_HEAD(reaplist);
6330
6331	if (!nfsd_netns_ready(nn))
6332		return count;
6333
6334	spin_lock(&nn->client_lock);
6335	clp = nfsd_find_client(addr, addr_size);
6336	if (clp)
6337		count = nfsd_collect_client_openowners(clp, &reaplist, 0);
6338	spin_unlock(&nn->client_lock);
6339	nfsd_reap_openowners(&reaplist);
6340	return count;
6341}
6342
6343u64
6344nfsd_inject_forget_openowners(u64 max)
6345{
6346	u64 count = 0;
6347	struct nfs4_client *clp;
6348	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6349						nfsd_net_id);
6350	LIST_HEAD(reaplist);
6351
6352	if (!nfsd_netns_ready(nn))
6353		return count;
6354
6355	spin_lock(&nn->client_lock);
6356	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6357		count += nfsd_collect_client_openowners(clp, &reaplist,
6358							max - count);
6359		if (max != 0 && count >= max)
6360			break;
6361	}
6362	spin_unlock(&nn->client_lock);
6363	nfsd_reap_openowners(&reaplist);
6364	return count;
6365}
6366
6367static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
6368				     struct list_head *victims)
6369{
6370	struct nfs4_delegation *dp, *next;
6371	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6372						nfsd_net_id);
6373	u64 count = 0;
6374
6375	lockdep_assert_held(&nn->client_lock);
6376
6377	spin_lock(&state_lock);
6378	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
6379		if (victims) {
6380			/*
6381			 * It's not safe to mess with delegations that have a
6382			 * non-zero dl_time. They might have already been broken
6383			 * and could be processed by the laundromat outside of
6384			 * the state_lock. Just leave them be.
6385			 */
6386			if (dp->dl_time != 0)
6387				continue;
6388
6389			atomic_inc(&clp->cl_refcount);
6390			WARN_ON(!unhash_delegation_locked(dp));
6391			list_add(&dp->dl_recall_lru, victims);
6392		}
6393		++count;
6394		/*
6395		 * Despite the fact that these functions deal with
6396		 * 64-bit integers for "count", we must ensure that
6397		 * it doesn't blow up the clp->cl_refcount. Throw a
6398		 * warning if we start to approach INT_MAX here.
6399		 */
6400		WARN_ON_ONCE(count == (INT_MAX / 2));
6401		if (count == max)
6402			break;
6403	}
6404	spin_unlock(&state_lock);
6405	return count;
6406}
6407
6408static u64
6409nfsd_print_client_delegations(struct nfs4_client *clp)
6410{
6411	u64 count = nfsd_find_all_delegations(clp, 0, NULL);
6412
6413	nfsd_print_count(clp, count, "delegations");
6414	return count;
6415}
6416
6417u64
6418nfsd_inject_print_delegations(void)
6419{
6420	struct nfs4_client *clp;
6421	u64 count = 0;
6422	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6423						nfsd_net_id);
6424
6425	if (!nfsd_netns_ready(nn))
6426		return 0;
6427
6428	spin_lock(&nn->client_lock);
6429	list_for_each_entry(clp, &nn->client_lru, cl_lru)
6430		count += nfsd_print_client_delegations(clp);
6431	spin_unlock(&nn->client_lock);
6432
6433	return count;
6434}
6435
6436static void
6437nfsd_forget_delegations(struct list_head *reaplist)
6438{
6439	struct nfs4_client *clp;
6440	struct nfs4_delegation *dp, *next;
6441
6442	list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6443		list_del_init(&dp->dl_recall_lru);
6444		clp = dp->dl_stid.sc_client;
6445		revoke_delegation(dp);
6446		put_client(clp);
6447	}
6448}
6449
6450u64
6451nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
6452				      size_t addr_size)
6453{
6454	u64 count = 0;
6455	struct nfs4_client *clp;
6456	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6457						nfsd_net_id);
6458	LIST_HEAD(reaplist);
6459
6460	if (!nfsd_netns_ready(nn))
6461		return count;
6462
6463	spin_lock(&nn->client_lock);
6464	clp = nfsd_find_client(addr, addr_size);
6465	if (clp)
6466		count = nfsd_find_all_delegations(clp, 0, &reaplist);
6467	spin_unlock(&nn->client_lock);
6468
6469	nfsd_forget_delegations(&reaplist);
6470	return count;
6471}
6472
6473u64
6474nfsd_inject_forget_delegations(u64 max)
6475{
6476	u64 count = 0;
6477	struct nfs4_client *clp;
6478	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6479						nfsd_net_id);
6480	LIST_HEAD(reaplist);
6481
6482	if (!nfsd_netns_ready(nn))
6483		return count;
6484
6485	spin_lock(&nn->client_lock);
6486	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6487		count += nfsd_find_all_delegations(clp, max - count, &reaplist);
6488		if (max != 0 && count >= max)
6489			break;
6490	}
6491	spin_unlock(&nn->client_lock);
6492	nfsd_forget_delegations(&reaplist);
6493	return count;
6494}
6495
6496static void
6497nfsd_recall_delegations(struct list_head *reaplist)
6498{
6499	struct nfs4_client *clp;
6500	struct nfs4_delegation *dp, *next;
6501
6502	list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6503		list_del_init(&dp->dl_recall_lru);
6504		clp = dp->dl_stid.sc_client;
6505		/*
6506		 * We skipped all entries that had a zero dl_time before,
6507		 * so we can now reset the dl_time back to 0. If a delegation
6508		 * break comes in now, then it won't make any difference since
6509		 * we're recalling it either way.
6510		 */
6511		spin_lock(&state_lock);
6512		dp->dl_time = 0;
6513		spin_unlock(&state_lock);
6514		nfsd_break_one_deleg(dp);
6515		put_client(clp);
6516	}
6517}
6518
6519u64
6520nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
6521				      size_t addr_size)
6522{
6523	u64 count = 0;
6524	struct nfs4_client *clp;
6525	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6526						nfsd_net_id);
6527	LIST_HEAD(reaplist);
6528
6529	if (!nfsd_netns_ready(nn))
6530		return count;
6531
6532	spin_lock(&nn->client_lock);
6533	clp = nfsd_find_client(addr, addr_size);
6534	if (clp)
6535		count = nfsd_find_all_delegations(clp, 0, &reaplist);
6536	spin_unlock(&nn->client_lock);
6537
6538	nfsd_recall_delegations(&reaplist);
6539	return count;
6540}
6541
6542u64
6543nfsd_inject_recall_delegations(u64 max)
6544{
6545	u64 count = 0;
6546	struct nfs4_client *clp, *next;
6547	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6548						nfsd_net_id);
6549	LIST_HEAD(reaplist);
6550
6551	if (!nfsd_netns_ready(nn))
6552		return count;
6553
6554	spin_lock(&nn->client_lock);
6555	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
6556		count += nfsd_find_all_delegations(clp, max - count, &reaplist);
6557		if (max != 0 && ++count >= max)
6558			break;
6559	}
6560	spin_unlock(&nn->client_lock);
6561	nfsd_recall_delegations(&reaplist);
6562	return count;
6563}
6564#endif /* CONFIG_NFSD_FAULT_INJECTION */
6565
6566/*
6567 * Since the lifetime of a delegation isn't limited to that of an open, a
6568 * client may quite reasonably hang on to a delegation as long as it has
6569 * the inode cached.  This becomes an obvious problem the first time a
6570 * client's inode cache approaches the size of the server's total memory.
6571 *
6572 * For now we avoid this problem by imposing a hard limit on the number
6573 * of delegations, which varies according to the server's memory size.
6574 */
6575static void
6576set_max_delegations(void)
6577{
6578	/*
6579	 * Allow at most 4 delegations per megabyte of RAM.  Quick
6580	 * estimates suggest that in the worst case (where every delegation
6581	 * is for a different inode), a delegation could take about 1.5K,
6582	 * giving a worst case usage of about 6% of memory.
6583	 */
6584	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
6585}
6586
6587static int nfs4_state_create_net(struct net *net)
6588{
6589	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6590	int i;
6591
6592	nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) *
6593			CLIENT_HASH_SIZE, GFP_KERNEL);
6594	if (!nn->conf_id_hashtbl)
6595		goto err;
6596	nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) *
6597			CLIENT_HASH_SIZE, GFP_KERNEL);
6598	if (!nn->unconf_id_hashtbl)
6599		goto err_unconf_id;
6600	nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
6601			SESSION_HASH_SIZE, GFP_KERNEL);
6602	if (!nn->sessionid_hashtbl)
6603		goto err_sessionid;
6604
6605	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6606		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
6607		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
6608	}
6609	for (i = 0; i < SESSION_HASH_SIZE; i++)
6610		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
6611	nn->conf_name_tree = RB_ROOT;
6612	nn->unconf_name_tree = RB_ROOT;
6613	INIT_LIST_HEAD(&nn->client_lru);
6614	INIT_LIST_HEAD(&nn->close_lru);
6615	INIT_LIST_HEAD(&nn->del_recall_lru);
6616	spin_lock_init(&nn->client_lock);
6617
6618	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
6619	get_net(net);
6620
6621	return 0;
6622
6623err_sessionid:
6624	kfree(nn->unconf_id_hashtbl);
6625err_unconf_id:
6626	kfree(nn->conf_id_hashtbl);
6627err:
6628	return -ENOMEM;
6629}
6630
6631static void
6632nfs4_state_destroy_net(struct net *net)
6633{
6634	int i;
6635	struct nfs4_client *clp = NULL;
6636	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6637
6638	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6639		while (!list_empty(&nn->conf_id_hashtbl[i])) {
6640			clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
6641			destroy_client(clp);
6642		}
6643	}
6644
6645	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6646		while (!list_empty(&nn->unconf_id_hashtbl[i])) {
6647			clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
6648			destroy_client(clp);
6649		}
6650	}
6651
6652	kfree(nn->sessionid_hashtbl);
6653	kfree(nn->unconf_id_hashtbl);
6654	kfree(nn->conf_id_hashtbl);
6655	put_net(net);
6656}
6657
6658int
6659nfs4_state_start_net(struct net *net)
6660{
6661	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6662	int ret;
6663
6664	ret = nfs4_state_create_net(net);
6665	if (ret)
6666		return ret;
6667	nn->boot_time = get_seconds();
6668	nn->grace_ended = false;
6669	locks_start_grace(net, &nn->nfsd4_manager);
6670	nfsd4_client_tracking_init(net);
6671	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
6672	       nn->nfsd4_grace, net);
6673	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
6674	return 0;
6675}
6676
6677/* initialization to perform when the nfsd service is started: */
6678
6679int
6680nfs4_state_start(void)
6681{
6682	int ret;
6683
6684	ret = set_callback_cred();
6685	if (ret)
6686		return -ENOMEM;
6687	laundry_wq = create_singlethread_workqueue("nfsd4");
6688	if (laundry_wq == NULL) {
6689		ret = -ENOMEM;
6690		goto out_recovery;
6691	}
6692	ret = nfsd4_create_callback_queue();
6693	if (ret)
6694		goto out_free_laundry;
6695
6696	set_max_delegations();
6697
6698	return 0;
6699
6700out_free_laundry:
6701	destroy_workqueue(laundry_wq);
6702out_recovery:
6703	return ret;
6704}
6705
6706void
6707nfs4_state_shutdown_net(struct net *net)
6708{
6709	struct nfs4_delegation *dp = NULL;
6710	struct list_head *pos, *next, reaplist;
6711	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6712
6713	cancel_delayed_work_sync(&nn->laundromat_work);
6714	locks_end_grace(&nn->nfsd4_manager);
6715
6716	INIT_LIST_HEAD(&reaplist);
6717	spin_lock(&state_lock);
6718	list_for_each_safe(pos, next, &nn->del_recall_lru) {
6719		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6720		WARN_ON(!unhash_delegation_locked(dp));
6721		list_add(&dp->dl_recall_lru, &reaplist);
6722	}
6723	spin_unlock(&state_lock);
6724	list_for_each_safe(pos, next, &reaplist) {
6725		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6726		list_del_init(&dp->dl_recall_lru);
6727		put_clnt_odstate(dp->dl_clnt_odstate);
6728		nfs4_put_deleg_lease(dp->dl_stid.sc_file);
6729		nfs4_put_stid(&dp->dl_stid);
6730	}
6731
6732	nfsd4_client_tracking_exit(net);
6733	nfs4_state_destroy_net(net);
6734}
6735
6736void
6737nfs4_state_shutdown(void)
6738{
6739	destroy_workqueue(laundry_wq);
6740	nfsd4_destroy_callback_queue();
6741}
6742
6743static void
6744get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
6745{
6746	if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
6747		memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
6748}
6749
6750static void
6751put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
6752{
6753	if (cstate->minorversion) {
6754		memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
6755		SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
6756	}
6757}
6758
6759void
6760clear_current_stateid(struct nfsd4_compound_state *cstate)
6761{
6762	CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
6763}
6764
6765/*
6766 * functions to set current state id
6767 */
6768void
6769nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
6770{
6771	put_stateid(cstate, &odp->od_stateid);
6772}
6773
6774void
6775nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
6776{
6777	put_stateid(cstate, &open->op_stateid);
6778}
6779
6780void
6781nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
6782{
6783	put_stateid(cstate, &close->cl_stateid);
6784}
6785
6786void
6787nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
6788{
6789	put_stateid(cstate, &lock->lk_resp_stateid);
6790}
6791
6792/*
6793 * functions to consume current state id
6794 */
6795
6796void
6797nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
6798{
6799	get_stateid(cstate, &odp->od_stateid);
6800}
6801
6802void
6803nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
6804{
6805	get_stateid(cstate, &drp->dr_stateid);
6806}
6807
6808void
6809nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
6810{
6811	get_stateid(cstate, &fsp->fr_stateid);
6812}
6813
6814void
6815nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
6816{
6817	get_stateid(cstate, &setattr->sa_stateid);
6818}
6819
6820void
6821nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
6822{
6823	get_stateid(cstate, &close->cl_stateid);
6824}
6825
6826void
6827nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
6828{
6829	get_stateid(cstate, &locku->lu_stateid);
6830}
6831
6832void
6833nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
6834{
6835	get_stateid(cstate, &read->rd_stateid);
6836}
6837
6838void
6839nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
6840{
6841	get_stateid(cstate, &write->wr_stateid);
6842}
6843