1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/spinlock.h>
15#include <linux/completion.h>
16#include <linux/buffer_head.h>
17#include <linux/kallsyms.h>
18#include <linux/gfs2_ondisk.h>
19
20#include "gfs2.h"
21#include "incore.h"
22#include "glock.h"
23#include "inode.h"
24#include "log.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "trans.h"
28#include "util.h"
29#include "trace_gfs2.h"
30
31int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
32		     unsigned int revokes)
33{
34	struct gfs2_trans *tr;
35	int error;
36
37	BUG_ON(current->journal_info);
38	BUG_ON(blocks == 0 && revokes == 0);
39
40	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
41		return -EROFS;
42
43	tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
44	if (!tr)
45		return -ENOMEM;
46
47	tr->tr_ip = _RET_IP_;
48	tr->tr_blocks = blocks;
49	tr->tr_revokes = revokes;
50	tr->tr_reserved = 1;
51	tr->tr_alloced = 1;
52	if (blocks)
53		tr->tr_reserved += 6 + blocks;
54	if (revokes)
55		tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
56						   sizeof(u64));
57	INIT_LIST_HEAD(&tr->tr_databuf);
58	INIT_LIST_HEAD(&tr->tr_buf);
59
60	sb_start_intwrite(sdp->sd_vfs);
61
62	error = gfs2_log_reserve(sdp, tr->tr_reserved);
63	if (error)
64		goto fail;
65
66	current->journal_info = tr;
67
68	return 0;
69
70fail:
71	sb_end_intwrite(sdp->sd_vfs);
72	kfree(tr);
73
74	return error;
75}
76
77static void gfs2_print_trans(const struct gfs2_trans *tr)
78{
79	pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip);
80	pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n",
81		tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched);
82	pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
83		tr->tr_num_buf_new, tr->tr_num_buf_rm,
84		tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
85		tr->tr_num_revoke, tr->tr_num_revoke_rm);
86}
87
88void gfs2_trans_end(struct gfs2_sbd *sdp)
89{
90	struct gfs2_trans *tr = current->journal_info;
91	s64 nbuf;
92	int alloced = tr->tr_alloced;
93
94	BUG_ON(!tr);
95	current->journal_info = NULL;
96
97	if (!tr->tr_touched) {
98		gfs2_log_release(sdp, tr->tr_reserved);
99		if (alloced) {
100			kfree(tr);
101			sb_end_intwrite(sdp->sd_vfs);
102		}
103		return;
104	}
105
106	nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
107	nbuf -= tr->tr_num_buf_rm;
108	nbuf -= tr->tr_num_databuf_rm;
109
110	if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
111				       (tr->tr_num_revoke <= tr->tr_revokes)))
112		gfs2_print_trans(tr);
113
114	gfs2_log_commit(sdp, tr);
115	if (alloced && !tr->tr_attached)
116			kfree(tr);
117	up_read(&sdp->sd_log_flush_lock);
118
119	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
120		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
121	if (alloced)
122		sb_end_intwrite(sdp->sd_vfs);
123}
124
125static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
126					       struct buffer_head *bh,
127					       const struct gfs2_log_operations *lops)
128{
129	struct gfs2_bufdata *bd;
130
131	bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
132	bd->bd_bh = bh;
133	bd->bd_gl = gl;
134	bd->bd_ops = lops;
135	INIT_LIST_HEAD(&bd->bd_list);
136	bh->b_private = bd;
137	return bd;
138}
139
140/**
141 * gfs2_trans_add_data - Add a databuf to the transaction.
142 * @gl: The inode glock associated with the buffer
143 * @bh: The buffer to add
144 *
145 * This is used in two distinct cases:
146 * i) In ordered write mode
147 *    We put the data buffer on a list so that we can ensure that its
148 *    synced to disk at the right time
149 * ii) In journaled data mode
150 *    We need to journal the data block in the same way as metadata in
151 *    the functions above. The difference is that here we have a tag
152 *    which is two __be64's being the block number (as per meta data)
153 *    and a flag which says whether the data block needs escaping or
154 *    not. This means we need a new log entry for each 251 or so data
155 *    blocks, which isn't an enormous overhead but twice as much as
156 *    for normal metadata blocks.
157 */
158void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
159{
160	struct gfs2_trans *tr = current->journal_info;
161	struct gfs2_sbd *sdp = gl->gl_sbd;
162	struct address_space *mapping = bh->b_page->mapping;
163	struct gfs2_inode *ip = GFS2_I(mapping->host);
164	struct gfs2_bufdata *bd;
165
166	if (!gfs2_is_jdata(ip)) {
167		gfs2_ordered_add_inode(ip);
168		return;
169	}
170
171	lock_buffer(bh);
172	gfs2_log_lock(sdp);
173	bd = bh->b_private;
174	if (bd == NULL) {
175		gfs2_log_unlock(sdp);
176		unlock_buffer(bh);
177		if (bh->b_private == NULL)
178			bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
179		lock_buffer(bh);
180		gfs2_log_lock(sdp);
181	}
182	gfs2_assert(sdp, bd->bd_gl == gl);
183	tr->tr_touched = 1;
184	if (list_empty(&bd->bd_list)) {
185		set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
186		set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
187		gfs2_pin(sdp, bd->bd_bh);
188		tr->tr_num_databuf_new++;
189		list_add_tail(&bd->bd_list, &tr->tr_databuf);
190	}
191	gfs2_log_unlock(sdp);
192	unlock_buffer(bh);
193}
194
195static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
196{
197	struct gfs2_meta_header *mh;
198	struct gfs2_trans *tr;
199	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
200
201	tr = current->journal_info;
202	tr->tr_touched = 1;
203	if (!list_empty(&bd->bd_list))
204		return;
205	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
206	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
207	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
208	if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
209		pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n",
210		       (unsigned long long)bd->bd_bh->b_blocknr);
211		BUG();
212	}
213	if (unlikely(state == SFS_FROZEN)) {
214		printk(KERN_INFO "GFS2:adding buf while frozen\n");
215		gfs2_assert_withdraw(sdp, 0);
216	}
217	gfs2_pin(sdp, bd->bd_bh);
218	mh->__pad0 = cpu_to_be64(0);
219	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
220	list_add(&bd->bd_list, &tr->tr_buf);
221	tr->tr_num_buf_new++;
222}
223
224void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
225{
226
227	struct gfs2_sbd *sdp = gl->gl_sbd;
228	struct gfs2_bufdata *bd;
229
230	lock_buffer(bh);
231	gfs2_log_lock(sdp);
232	bd = bh->b_private;
233	if (bd == NULL) {
234		gfs2_log_unlock(sdp);
235		unlock_buffer(bh);
236		lock_page(bh->b_page);
237		if (bh->b_private == NULL)
238			bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
239		unlock_page(bh->b_page);
240		lock_buffer(bh);
241		gfs2_log_lock(sdp);
242	}
243	gfs2_assert(sdp, bd->bd_gl == gl);
244	meta_lo_add(sdp, bd);
245	gfs2_log_unlock(sdp);
246	unlock_buffer(bh);
247}
248
249void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
250{
251	struct gfs2_trans *tr = current->journal_info;
252
253	BUG_ON(!list_empty(&bd->bd_list));
254	gfs2_add_revoke(sdp, bd);
255	tr->tr_touched = 1;
256	tr->tr_num_revoke++;
257}
258
259void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
260{
261	struct gfs2_bufdata *bd, *tmp;
262	struct gfs2_trans *tr = current->journal_info;
263	unsigned int n = len;
264
265	gfs2_log_lock(sdp);
266	list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_list) {
267		if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
268			list_del_init(&bd->bd_list);
269			gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
270			sdp->sd_log_num_revoke--;
271			kmem_cache_free(gfs2_bufdata_cachep, bd);
272			tr->tr_num_revoke_rm++;
273			if (--n == 0)
274				break;
275		}
276	}
277	gfs2_log_unlock(sdp);
278}
279
280