1/*
2 * Copyright (C) 2008 Oracle.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/spinlock.h>
21#include <linux/page-flags.h>
22#include <asm/bug.h>
23#include "ctree.h"
24#include "extent_io.h"
25#include "locking.h"
26
27static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
28
29/*
30 * if we currently have a spinning reader or writer lock
31 * (indicated by the rw flag) this will bump the count
32 * of blocking holders and drop the spinlock.
33 */
34void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
35{
36	/*
37	 * no lock is required.  The lock owner may change if
38	 * we have a read lock, but it won't change to or away
39	 * from us.  If we have the write lock, we are the owner
40	 * and it'll never change.
41	 */
42	if (eb->lock_nested && current->pid == eb->lock_owner)
43		return;
44	if (rw == BTRFS_WRITE_LOCK) {
45		if (atomic_read(&eb->blocking_writers) == 0) {
46			WARN_ON(atomic_read(&eb->spinning_writers) != 1);
47			atomic_dec(&eb->spinning_writers);
48			btrfs_assert_tree_locked(eb);
49			atomic_inc(&eb->blocking_writers);
50			write_unlock(&eb->lock);
51		}
52	} else if (rw == BTRFS_READ_LOCK) {
53		btrfs_assert_tree_read_locked(eb);
54		atomic_inc(&eb->blocking_readers);
55		WARN_ON(atomic_read(&eb->spinning_readers) == 0);
56		atomic_dec(&eb->spinning_readers);
57		read_unlock(&eb->lock);
58	}
59	return;
60}
61
62/*
63 * if we currently have a blocking lock, take the spinlock
64 * and drop our blocking count
65 */
66void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
67{
68	/*
69	 * no lock is required.  The lock owner may change if
70	 * we have a read lock, but it won't change to or away
71	 * from us.  If we have the write lock, we are the owner
72	 * and it'll never change.
73	 */
74	if (eb->lock_nested && current->pid == eb->lock_owner)
75		return;
76
77	if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
78		BUG_ON(atomic_read(&eb->blocking_writers) != 1);
79		write_lock(&eb->lock);
80		WARN_ON(atomic_read(&eb->spinning_writers));
81		atomic_inc(&eb->spinning_writers);
82		/*
83		 * atomic_dec_and_test implies a barrier for waitqueue_active
84		 */
85		if (atomic_dec_and_test(&eb->blocking_writers) &&
86		    waitqueue_active(&eb->write_lock_wq))
87			wake_up(&eb->write_lock_wq);
88	} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
89		BUG_ON(atomic_read(&eb->blocking_readers) == 0);
90		read_lock(&eb->lock);
91		atomic_inc(&eb->spinning_readers);
92		/*
93		 * atomic_dec_and_test implies a barrier for waitqueue_active
94		 */
95		if (atomic_dec_and_test(&eb->blocking_readers) &&
96		    waitqueue_active(&eb->read_lock_wq))
97			wake_up(&eb->read_lock_wq);
98	}
99	return;
100}
101
102/*
103 * take a spinning read lock.  This will wait for any blocking
104 * writers
105 */
106void btrfs_tree_read_lock(struct extent_buffer *eb)
107{
108again:
109	BUG_ON(!atomic_read(&eb->blocking_writers) &&
110	       current->pid == eb->lock_owner);
111
112	read_lock(&eb->lock);
113	if (atomic_read(&eb->blocking_writers) &&
114	    current->pid == eb->lock_owner) {
115		/*
116		 * This extent is already write-locked by our thread. We allow
117		 * an additional read lock to be added because it's for the same
118		 * thread. btrfs_find_all_roots() depends on this as it may be
119		 * called on a partly (write-)locked tree.
120		 */
121		BUG_ON(eb->lock_nested);
122		eb->lock_nested = 1;
123		read_unlock(&eb->lock);
124		return;
125	}
126	if (atomic_read(&eb->blocking_writers)) {
127		read_unlock(&eb->lock);
128		wait_event(eb->write_lock_wq,
129			   atomic_read(&eb->blocking_writers) == 0);
130		goto again;
131	}
132	atomic_inc(&eb->read_locks);
133	atomic_inc(&eb->spinning_readers);
134}
135
136/*
137 * take a spinning read lock.
138 * returns 1 if we get the read lock and 0 if we don't
139 * this won't wait for blocking writers
140 */
141int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
142{
143	if (atomic_read(&eb->blocking_writers))
144		return 0;
145
146	read_lock(&eb->lock);
147	if (atomic_read(&eb->blocking_writers)) {
148		read_unlock(&eb->lock);
149		return 0;
150	}
151	atomic_inc(&eb->read_locks);
152	atomic_inc(&eb->spinning_readers);
153	return 1;
154}
155
156/*
157 * returns 1 if we get the read lock and 0 if we don't
158 * this won't wait for blocking writers
159 */
160int btrfs_try_tree_read_lock(struct extent_buffer *eb)
161{
162	if (atomic_read(&eb->blocking_writers))
163		return 0;
164
165	if (!read_trylock(&eb->lock))
166		return 0;
167
168	if (atomic_read(&eb->blocking_writers)) {
169		read_unlock(&eb->lock);
170		return 0;
171	}
172	atomic_inc(&eb->read_locks);
173	atomic_inc(&eb->spinning_readers);
174	return 1;
175}
176
177/*
178 * returns 1 if we get the read lock and 0 if we don't
179 * this won't wait for blocking writers or readers
180 */
181int btrfs_try_tree_write_lock(struct extent_buffer *eb)
182{
183	if (atomic_read(&eb->blocking_writers) ||
184	    atomic_read(&eb->blocking_readers))
185		return 0;
186
187	write_lock(&eb->lock);
188	if (atomic_read(&eb->blocking_writers) ||
189	    atomic_read(&eb->blocking_readers)) {
190		write_unlock(&eb->lock);
191		return 0;
192	}
193	atomic_inc(&eb->write_locks);
194	atomic_inc(&eb->spinning_writers);
195	eb->lock_owner = current->pid;
196	return 1;
197}
198
199/*
200 * drop a spinning read lock
201 */
202void btrfs_tree_read_unlock(struct extent_buffer *eb)
203{
204	/*
205	 * if we're nested, we have the write lock.  No new locking
206	 * is needed as long as we are the lock owner.
207	 * The write unlock will do a barrier for us, and the lock_nested
208	 * field only matters to the lock owner.
209	 */
210	if (eb->lock_nested && current->pid == eb->lock_owner) {
211		eb->lock_nested = 0;
212		return;
213	}
214	btrfs_assert_tree_read_locked(eb);
215	WARN_ON(atomic_read(&eb->spinning_readers) == 0);
216	atomic_dec(&eb->spinning_readers);
217	atomic_dec(&eb->read_locks);
218	read_unlock(&eb->lock);
219}
220
221/*
222 * drop a blocking read lock
223 */
224void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
225{
226	/*
227	 * if we're nested, we have the write lock.  No new locking
228	 * is needed as long as we are the lock owner.
229	 * The write unlock will do a barrier for us, and the lock_nested
230	 * field only matters to the lock owner.
231	 */
232	if (eb->lock_nested && current->pid == eb->lock_owner) {
233		eb->lock_nested = 0;
234		return;
235	}
236	btrfs_assert_tree_read_locked(eb);
237	WARN_ON(atomic_read(&eb->blocking_readers) == 0);
238	/*
239	 * atomic_dec_and_test implies a barrier for waitqueue_active
240	 */
241	if (atomic_dec_and_test(&eb->blocking_readers) &&
242	    waitqueue_active(&eb->read_lock_wq))
243		wake_up(&eb->read_lock_wq);
244	atomic_dec(&eb->read_locks);
245}
246
247/*
248 * take a spinning write lock.  This will wait for both
249 * blocking readers or writers
250 */
251void btrfs_tree_lock(struct extent_buffer *eb)
252{
253	WARN_ON(eb->lock_owner == current->pid);
254again:
255	wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
256	wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
257	write_lock(&eb->lock);
258	if (atomic_read(&eb->blocking_readers)) {
259		write_unlock(&eb->lock);
260		wait_event(eb->read_lock_wq,
261			   atomic_read(&eb->blocking_readers) == 0);
262		goto again;
263	}
264	if (atomic_read(&eb->blocking_writers)) {
265		write_unlock(&eb->lock);
266		wait_event(eb->write_lock_wq,
267			   atomic_read(&eb->blocking_writers) == 0);
268		goto again;
269	}
270	WARN_ON(atomic_read(&eb->spinning_writers));
271	atomic_inc(&eb->spinning_writers);
272	atomic_inc(&eb->write_locks);
273	eb->lock_owner = current->pid;
274}
275
276/*
277 * drop a spinning or a blocking write lock.
278 */
279void btrfs_tree_unlock(struct extent_buffer *eb)
280{
281	int blockers = atomic_read(&eb->blocking_writers);
282
283	BUG_ON(blockers > 1);
284
285	btrfs_assert_tree_locked(eb);
286	eb->lock_owner = 0;
287	atomic_dec(&eb->write_locks);
288
289	if (blockers) {
290		WARN_ON(atomic_read(&eb->spinning_writers));
291		atomic_dec(&eb->blocking_writers);
292		/*
293		 * Make sure counter is updated before we wake up waiters.
294		 */
295		smp_mb();
296		if (waitqueue_active(&eb->write_lock_wq))
297			wake_up(&eb->write_lock_wq);
298	} else {
299		WARN_ON(atomic_read(&eb->spinning_writers) != 1);
300		atomic_dec(&eb->spinning_writers);
301		write_unlock(&eb->lock);
302	}
303}
304
305void btrfs_assert_tree_locked(struct extent_buffer *eb)
306{
307	BUG_ON(!atomic_read(&eb->write_locks));
308}
309
310static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
311{
312	BUG_ON(!atomic_read(&eb->read_locks));
313}
314