1/*
2 *   Copyright (C) International Business Machines Corp., 2000-2004
3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
4 *
5 *   This program is free software;  you can redistribute it and/or modify
6 *   it under the terms of the GNU General Public License as published by
7 *   the Free Software Foundation; either version 2 of the License, or
8 *   (at your option) any later version.
9 *
10 *   This program is distributed in the hope that it will be useful,
11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13 *   the GNU General Public License for more details.
14 *
15 *   You should have received a copy of the GNU General Public License
16 *   along with this program;  if not, write to the Free Software
17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20/*
21 *	jfs_logmgr.c: log manager
22 *
23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c).
25 *
26 * note: for detail, RTFS.
27 *
28 *	log buffer manager:
29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer
33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed
35 *
36 *	group commit:
37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page.
40 *
41 *	TBD lazy commit:
42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full;
44 *
45 *	serialization:
46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close;
49 *
50 *	TBD log integrity:
51 * careful-write (ping-pong) of last logpage to recover from crash
52 * in overwrite.
53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer).
56 *
57 *	alternatives:
58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor.
60 */
61
62#include <linux/fs.h>
63#include <linux/blkdev.h>
64#include <linux/interrupt.h>
65#include <linux/completion.h>
66#include <linux/kthread.h>
67#include <linux/buffer_head.h>		/* for sync_blockdev() */
68#include <linux/bio.h>
69#include <linux/freezer.h>
70#include <linux/export.h>
71#include <linux/delay.h>
72#include <linux/mutex.h>
73#include <linux/seq_file.h>
74#include <linux/slab.h>
75#include "jfs_incore.h"
76#include "jfs_filsys.h"
77#include "jfs_metapage.h"
78#include "jfs_superblock.h"
79#include "jfs_txnmgr.h"
80#include "jfs_debug.h"
81
82
83/*
84 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
85 */
86static struct lbuf *log_redrive_list;
87static DEFINE_SPINLOCK(log_redrive_lock);
88
89
90/*
91 *	log read/write serialization (per log)
92 */
93#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
94#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
95#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
96
97
98/*
99 *	log group commit serialization (per log)
100 */
101
102#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
103#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
104#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
105#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
106
107/*
108 *	log sync serialization (per log)
109 */
110#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
111#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
112/*
113#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
114#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
115*/
116
117
118/*
119 *	log buffer cache synchronization
120 */
121static DEFINE_SPINLOCK(jfsLCacheLock);
122
123#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
124#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
125
126/*
127 * See __SLEEP_COND in jfs_locks.h
128 */
129#define LCACHE_SLEEP_COND(wq, cond, flags)	\
130do {						\
131	if (cond)				\
132		break;				\
133	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
134} while (0)
135
136#define	LCACHE_WAKEUP(event)	wake_up(event)
137
138
139/*
140 *	lbuf buffer cache (lCache) control
141 */
142/* log buffer manager pageout control (cumulative, inclusive) */
143#define	lbmREAD		0x0001
144#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
145				 * init pageout if at head of queue;
146				 */
147#define	lbmRELEASE	0x0004	/* remove from write queue
148				 * at completion of pageout;
149				 * do not free/recycle it yet:
150				 * caller will free it;
151				 */
152#define	lbmSYNC		0x0008	/* do not return to freelist
153				 * when removed from write queue;
154				 */
155#define lbmFREE		0x0010	/* return to freelist
156				 * at completion of pageout;
157				 * the buffer may be recycled;
158				 */
159#define	lbmDONE		0x0020
160#define	lbmERROR	0x0040
161#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
162				 * of log page
163				 */
164#define lbmDIRECT	0x0100
165
166/*
167 * Global list of active external journals
168 */
169static LIST_HEAD(jfs_external_logs);
170static struct jfs_log *dummy_log;
171static DEFINE_MUTEX(jfs_log_mutex);
172
173/*
174 * forward references
175 */
176static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
177			 struct lrd * lrd, struct tlock * tlck);
178
179static int lmNextPage(struct jfs_log * log);
180static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
181			   int activate);
182
183static int open_inline_log(struct super_block *sb);
184static int open_dummy_log(struct super_block *sb);
185static int lbmLogInit(struct jfs_log * log);
186static void lbmLogShutdown(struct jfs_log * log);
187static struct lbuf *lbmAllocate(struct jfs_log * log, int);
188static void lbmFree(struct lbuf * bp);
189static void lbmfree(struct lbuf * bp);
190static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
191static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
192static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
193static int lbmIOWait(struct lbuf * bp, int flag);
194static bio_end_io_t lbmIODone;
195static void lbmStartIO(struct lbuf * bp);
196static void lmGCwrite(struct jfs_log * log, int cant_block);
197static int lmLogSync(struct jfs_log * log, int hard_sync);
198
199
200
201/*
202 *	statistics
203 */
204#ifdef CONFIG_JFS_STATISTICS
205static struct lmStat {
206	uint commit;		/* # of commit */
207	uint pagedone;		/* # of page written */
208	uint submitted;		/* # of pages submitted */
209	uint full_page;		/* # of full pages submitted */
210	uint partial_page;	/* # of partial pages submitted */
211} lmStat;
212#endif
213
214static void write_special_inodes(struct jfs_log *log,
215				 int (*writer)(struct address_space *))
216{
217	struct jfs_sb_info *sbi;
218
219	list_for_each_entry(sbi, &log->sb_list, log_list) {
220		writer(sbi->ipbmap->i_mapping);
221		writer(sbi->ipimap->i_mapping);
222		writer(sbi->direct_inode->i_mapping);
223	}
224}
225
226/*
227 * NAME:	lmLog()
228 *
229 * FUNCTION:	write a log record;
230 *
231 * PARAMETER:
232 *
233 * RETURN:	lsn - offset to the next log record to write (end-of-log);
234 *		-1  - error;
235 *
236 * note: todo: log error handler
237 */
238int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
239	  struct tlock * tlck)
240{
241	int lsn;
242	int diffp, difft;
243	struct metapage *mp = NULL;
244	unsigned long flags;
245
246	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
247		 log, tblk, lrd, tlck);
248
249	LOG_LOCK(log);
250
251	/* log by (out-of-transaction) JFS ? */
252	if (tblk == NULL)
253		goto writeRecord;
254
255	/* log from page ? */
256	if (tlck == NULL ||
257	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
258		goto writeRecord;
259
260	/*
261	 *	initialize/update page/transaction recovery lsn
262	 */
263	lsn = log->lsn;
264
265	LOGSYNC_LOCK(log, flags);
266
267	/*
268	 * initialize page lsn if first log write of the page
269	 */
270	if (mp->lsn == 0) {
271		mp->log = log;
272		mp->lsn = lsn;
273		log->count++;
274
275		/* insert page at tail of logsynclist */
276		list_add_tail(&mp->synclist, &log->synclist);
277	}
278
279	/*
280	 *	initialize/update lsn of tblock of the page
281	 *
282	 * transaction inherits oldest lsn of pages associated
283	 * with allocation/deallocation of resources (their
284	 * log records are used to reconstruct allocation map
285	 * at recovery time: inode for inode allocation map,
286	 * B+-tree index of extent descriptors for block
287	 * allocation map);
288	 * allocation map pages inherit transaction lsn at
289	 * commit time to allow forwarding log syncpt past log
290	 * records associated with allocation/deallocation of
291	 * resources only after persistent map of these map pages
292	 * have been updated and propagated to home.
293	 */
294	/*
295	 * initialize transaction lsn:
296	 */
297	if (tblk->lsn == 0) {
298		/* inherit lsn of its first page logged */
299		tblk->lsn = mp->lsn;
300		log->count++;
301
302		/* insert tblock after the page on logsynclist */
303		list_add(&tblk->synclist, &mp->synclist);
304	}
305	/*
306	 * update transaction lsn:
307	 */
308	else {
309		/* inherit oldest/smallest lsn of page */
310		logdiff(diffp, mp->lsn, log);
311		logdiff(difft, tblk->lsn, log);
312		if (diffp < difft) {
313			/* update tblock lsn with page lsn */
314			tblk->lsn = mp->lsn;
315
316			/* move tblock after page on logsynclist */
317			list_move(&tblk->synclist, &mp->synclist);
318		}
319	}
320
321	LOGSYNC_UNLOCK(log, flags);
322
323	/*
324	 *	write the log record
325	 */
326      writeRecord:
327	lsn = lmWriteRecord(log, tblk, lrd, tlck);
328
329	/*
330	 * forward log syncpt if log reached next syncpt trigger
331	 */
332	logdiff(diffp, lsn, log);
333	if (diffp >= log->nextsync)
334		lsn = lmLogSync(log, 0);
335
336	/* update end-of-log lsn */
337	log->lsn = lsn;
338
339	LOG_UNLOCK(log);
340
341	/* return end-of-log address */
342	return lsn;
343}
344
345/*
346 * NAME:	lmWriteRecord()
347 *
348 * FUNCTION:	move the log record to current log page
349 *
350 * PARAMETER:	cd	- commit descriptor
351 *
352 * RETURN:	end-of-log address
353 *
354 * serialization: LOG_LOCK() held on entry/exit
355 */
356static int
357lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
358	      struct tlock * tlck)
359{
360	int lsn = 0;		/* end-of-log address */
361	struct lbuf *bp;	/* dst log page buffer */
362	struct logpage *lp;	/* dst log page */
363	caddr_t dst;		/* destination address in log page */
364	int dstoffset;		/* end-of-log offset in log page */
365	int freespace;		/* free space in log page */
366	caddr_t p;		/* src meta-data page */
367	caddr_t src;
368	int srclen;
369	int nbytes;		/* number of bytes to move */
370	int i;
371	int len;
372	struct linelock *linelock;
373	struct lv *lv;
374	struct lvd *lvd;
375	int l2linesize;
376
377	len = 0;
378
379	/* retrieve destination log page to write */
380	bp = (struct lbuf *) log->bp;
381	lp = (struct logpage *) bp->l_ldata;
382	dstoffset = log->eor;
383
384	/* any log data to write ? */
385	if (tlck == NULL)
386		goto moveLrd;
387
388	/*
389	 *	move log record data
390	 */
391	/* retrieve source meta-data page to log */
392	if (tlck->flag & tlckPAGELOCK) {
393		p = (caddr_t) (tlck->mp->data);
394		linelock = (struct linelock *) & tlck->lock;
395	}
396	/* retrieve source in-memory inode to log */
397	else if (tlck->flag & tlckINODELOCK) {
398		if (tlck->type & tlckDTREE)
399			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
400		else
401			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
402		linelock = (struct linelock *) & tlck->lock;
403	}
404#ifdef	_JFS_WIP
405	else if (tlck->flag & tlckINLINELOCK) {
406
407		inlinelock = (struct inlinelock *) & tlck;
408		p = (caddr_t) & inlinelock->pxd;
409		linelock = (struct linelock *) & tlck;
410	}
411#endif				/* _JFS_WIP */
412	else {
413		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
414		return 0;	/* Probably should trap */
415	}
416	l2linesize = linelock->l2linesize;
417
418      moveData:
419	ASSERT(linelock->index <= linelock->maxcnt);
420
421	lv = linelock->lv;
422	for (i = 0; i < linelock->index; i++, lv++) {
423		if (lv->length == 0)
424			continue;
425
426		/* is page full ? */
427		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
428			/* page become full: move on to next page */
429			lmNextPage(log);
430
431			bp = log->bp;
432			lp = (struct logpage *) bp->l_ldata;
433			dstoffset = LOGPHDRSIZE;
434		}
435
436		/*
437		 * move log vector data
438		 */
439		src = (u8 *) p + (lv->offset << l2linesize);
440		srclen = lv->length << l2linesize;
441		len += srclen;
442		while (srclen > 0) {
443			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
444			nbytes = min(freespace, srclen);
445			dst = (caddr_t) lp + dstoffset;
446			memcpy(dst, src, nbytes);
447			dstoffset += nbytes;
448
449			/* is page not full ? */
450			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
451				break;
452
453			/* page become full: move on to next page */
454			lmNextPage(log);
455
456			bp = (struct lbuf *) log->bp;
457			lp = (struct logpage *) bp->l_ldata;
458			dstoffset = LOGPHDRSIZE;
459
460			srclen -= nbytes;
461			src += nbytes;
462		}
463
464		/*
465		 * move log vector descriptor
466		 */
467		len += 4;
468		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
469		lvd->offset = cpu_to_le16(lv->offset);
470		lvd->length = cpu_to_le16(lv->length);
471		dstoffset += 4;
472		jfs_info("lmWriteRecord: lv offset:%d length:%d",
473			 lv->offset, lv->length);
474	}
475
476	if ((i = linelock->next)) {
477		linelock = (struct linelock *) lid_to_tlock(i);
478		goto moveData;
479	}
480
481	/*
482	 *	move log record descriptor
483	 */
484      moveLrd:
485	lrd->length = cpu_to_le16(len);
486
487	src = (caddr_t) lrd;
488	srclen = LOGRDSIZE;
489
490	while (srclen > 0) {
491		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
492		nbytes = min(freespace, srclen);
493		dst = (caddr_t) lp + dstoffset;
494		memcpy(dst, src, nbytes);
495
496		dstoffset += nbytes;
497		srclen -= nbytes;
498
499		/* are there more to move than freespace of page ? */
500		if (srclen)
501			goto pageFull;
502
503		/*
504		 * end of log record descriptor
505		 */
506
507		/* update last log record eor */
508		log->eor = dstoffset;
509		bp->l_eor = dstoffset;
510		lsn = (log->page << L2LOGPSIZE) + dstoffset;
511
512		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
513			tblk->clsn = lsn;
514			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
515				 bp->l_eor);
516
517			INCREMENT(lmStat.commit);	/* # of commit */
518
519			/*
520			 * enqueue tblock for group commit:
521			 *
522			 * enqueue tblock of non-trivial/synchronous COMMIT
523			 * at tail of group commit queue
524			 * (trivial/asynchronous COMMITs are ignored by
525			 * group commit.)
526			 */
527			LOGGC_LOCK(log);
528
529			/* init tblock gc state */
530			tblk->flag = tblkGC_QUEUE;
531			tblk->bp = log->bp;
532			tblk->pn = log->page;
533			tblk->eor = log->eor;
534
535			/* enqueue transaction to commit queue */
536			list_add_tail(&tblk->cqueue, &log->cqueue);
537
538			LOGGC_UNLOCK(log);
539		}
540
541		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
542			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
543
544		/* page not full ? */
545		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
546			return lsn;
547
548	      pageFull:
549		/* page become full: move on to next page */
550		lmNextPage(log);
551
552		bp = (struct lbuf *) log->bp;
553		lp = (struct logpage *) bp->l_ldata;
554		dstoffset = LOGPHDRSIZE;
555		src += nbytes;
556	}
557
558	return lsn;
559}
560
561
562/*
563 * NAME:	lmNextPage()
564 *
565 * FUNCTION:	write current page and allocate next page.
566 *
567 * PARAMETER:	log
568 *
569 * RETURN:	0
570 *
571 * serialization: LOG_LOCK() held on entry/exit
572 */
573static int lmNextPage(struct jfs_log * log)
574{
575	struct logpage *lp;
576	int lspn;		/* log sequence page number */
577	int pn;			/* current page number */
578	struct lbuf *bp;
579	struct lbuf *nextbp;
580	struct tblock *tblk;
581
582	/* get current log page number and log sequence page number */
583	pn = log->page;
584	bp = log->bp;
585	lp = (struct logpage *) bp->l_ldata;
586	lspn = le32_to_cpu(lp->h.page);
587
588	LOGGC_LOCK(log);
589
590	/*
591	 *	write or queue the full page at the tail of write queue
592	 */
593	/* get the tail tblk on commit queue */
594	if (list_empty(&log->cqueue))
595		tblk = NULL;
596	else
597		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
598
599	/* every tblk who has COMMIT record on the current page,
600	 * and has not been committed, must be on commit queue
601	 * since tblk is queued at commit queueu at the time
602	 * of writing its COMMIT record on the page before
603	 * page becomes full (even though the tblk thread
604	 * who wrote COMMIT record may have been suspended
605	 * currently);
606	 */
607
608	/* is page bound with outstanding tail tblk ? */
609	if (tblk && tblk->pn == pn) {
610		/* mark tblk for end-of-page */
611		tblk->flag |= tblkGC_EOP;
612
613		if (log->cflag & logGC_PAGEOUT) {
614			/* if page is not already on write queue,
615			 * just enqueue (no lbmWRITE to prevent redrive)
616			 * buffer to wqueue to ensure correct serial order
617			 * of the pages since log pages will be added
618			 * continuously
619			 */
620			if (bp->l_wqnext == NULL)
621				lbmWrite(log, bp, 0, 0);
622		} else {
623			/*
624			 * No current GC leader, initiate group commit
625			 */
626			log->cflag |= logGC_PAGEOUT;
627			lmGCwrite(log, 0);
628		}
629	}
630	/* page is not bound with outstanding tblk:
631	 * init write or mark it to be redriven (lbmWRITE)
632	 */
633	else {
634		/* finalize the page */
635		bp->l_ceor = bp->l_eor;
636		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
637		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
638	}
639	LOGGC_UNLOCK(log);
640
641	/*
642	 *	allocate/initialize next page
643	 */
644	/* if log wraps, the first data page of log is 2
645	 * (0 never used, 1 is superblock).
646	 */
647	log->page = (pn == log->size - 1) ? 2 : pn + 1;
648	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
649
650	/* allocate/initialize next log page buffer */
651	nextbp = lbmAllocate(log, log->page);
652	nextbp->l_eor = log->eor;
653	log->bp = nextbp;
654
655	/* initialize next log page */
656	lp = (struct logpage *) nextbp->l_ldata;
657	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
658	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
659
660	return 0;
661}
662
663
664/*
665 * NAME:	lmGroupCommit()
666 *
667 * FUNCTION:	group commit
668 *	initiate pageout of the pages with COMMIT in the order of
669 *	page number - redrive pageout of the page at the head of
670 *	pageout queue until full page has been written.
671 *
672 * RETURN:
673 *
674 * NOTE:
675 *	LOGGC_LOCK serializes log group commit queue, and
676 *	transaction blocks on the commit queue.
677 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
678 */
679int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
680{
681	int rc = 0;
682
683	LOGGC_LOCK(log);
684
685	/* group committed already ? */
686	if (tblk->flag & tblkGC_COMMITTED) {
687		if (tblk->flag & tblkGC_ERROR)
688			rc = -EIO;
689
690		LOGGC_UNLOCK(log);
691		return rc;
692	}
693	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
694
695	if (tblk->xflag & COMMIT_LAZY)
696		tblk->flag |= tblkGC_LAZY;
697
698	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
699	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
700	     || jfs_tlocks_low)) {
701		/*
702		 * No pageout in progress
703		 *
704		 * start group commit as its group leader.
705		 */
706		log->cflag |= logGC_PAGEOUT;
707
708		lmGCwrite(log, 0);
709	}
710
711	if (tblk->xflag & COMMIT_LAZY) {
712		/*
713		 * Lazy transactions can leave now
714		 */
715		LOGGC_UNLOCK(log);
716		return 0;
717	}
718
719	/* lmGCwrite gives up LOGGC_LOCK, check again */
720
721	if (tblk->flag & tblkGC_COMMITTED) {
722		if (tblk->flag & tblkGC_ERROR)
723			rc = -EIO;
724
725		LOGGC_UNLOCK(log);
726		return rc;
727	}
728
729	/* upcount transaction waiting for completion
730	 */
731	log->gcrtc++;
732	tblk->flag |= tblkGC_READY;
733
734	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
735		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
736
737	/* removed from commit queue */
738	if (tblk->flag & tblkGC_ERROR)
739		rc = -EIO;
740
741	LOGGC_UNLOCK(log);
742	return rc;
743}
744
745/*
746 * NAME:	lmGCwrite()
747 *
748 * FUNCTION:	group commit write
749 *	initiate write of log page, building a group of all transactions
750 *	with commit records on that page.
751 *
752 * RETURN:	None
753 *
754 * NOTE:
755 *	LOGGC_LOCK must be held by caller.
756 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
757 */
758static void lmGCwrite(struct jfs_log * log, int cant_write)
759{
760	struct lbuf *bp;
761	struct logpage *lp;
762	int gcpn;		/* group commit page number */
763	struct tblock *tblk;
764	struct tblock *xtblk = NULL;
765
766	/*
767	 * build the commit group of a log page
768	 *
769	 * scan commit queue and make a commit group of all
770	 * transactions with COMMIT records on the same log page.
771	 */
772	/* get the head tblk on the commit queue */
773	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
774
775	list_for_each_entry(tblk, &log->cqueue, cqueue) {
776		if (tblk->pn != gcpn)
777			break;
778
779		xtblk = tblk;
780
781		/* state transition: (QUEUE, READY) -> COMMIT */
782		tblk->flag |= tblkGC_COMMIT;
783	}
784	tblk = xtblk;		/* last tblk of the page */
785
786	/*
787	 * pageout to commit transactions on the log page.
788	 */
789	bp = (struct lbuf *) tblk->bp;
790	lp = (struct logpage *) bp->l_ldata;
791	/* is page already full ? */
792	if (tblk->flag & tblkGC_EOP) {
793		/* mark page to free at end of group commit of the page */
794		tblk->flag &= ~tblkGC_EOP;
795		tblk->flag |= tblkGC_FREE;
796		bp->l_ceor = bp->l_eor;
797		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
798		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
799			 cant_write);
800		INCREMENT(lmStat.full_page);
801	}
802	/* page is not yet full */
803	else {
804		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
805		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
806		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
807		INCREMENT(lmStat.partial_page);
808	}
809}
810
811/*
812 * NAME:	lmPostGC()
813 *
814 * FUNCTION:	group commit post-processing
815 *	Processes transactions after their commit records have been written
816 *	to disk, redriving log I/O if necessary.
817 *
818 * RETURN:	None
819 *
820 * NOTE:
821 *	This routine is called a interrupt time by lbmIODone
822 */
823static void lmPostGC(struct lbuf * bp)
824{
825	unsigned long flags;
826	struct jfs_log *log = bp->l_log;
827	struct logpage *lp;
828	struct tblock *tblk, *temp;
829
830	//LOGGC_LOCK(log);
831	spin_lock_irqsave(&log->gclock, flags);
832	/*
833	 * current pageout of group commit completed.
834	 *
835	 * remove/wakeup transactions from commit queue who were
836	 * group committed with the current log page
837	 */
838	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
839		if (!(tblk->flag & tblkGC_COMMIT))
840			break;
841		/* if transaction was marked GC_COMMIT then
842		 * it has been shipped in the current pageout
843		 * and made it to disk - it is committed.
844		 */
845
846		if (bp->l_flag & lbmERROR)
847			tblk->flag |= tblkGC_ERROR;
848
849		/* remove it from the commit queue */
850		list_del(&tblk->cqueue);
851		tblk->flag &= ~tblkGC_QUEUE;
852
853		if (tblk == log->flush_tblk) {
854			/* we can stop flushing the log now */
855			clear_bit(log_FLUSH, &log->flag);
856			log->flush_tblk = NULL;
857		}
858
859		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
860			 tblk->flag);
861
862		if (!(tblk->xflag & COMMIT_FORCE))
863			/*
864			 * Hand tblk over to lazy commit thread
865			 */
866			txLazyUnlock(tblk);
867		else {
868			/* state transition: COMMIT -> COMMITTED */
869			tblk->flag |= tblkGC_COMMITTED;
870
871			if (tblk->flag & tblkGC_READY)
872				log->gcrtc--;
873
874			LOGGC_WAKEUP(tblk);
875		}
876
877		/* was page full before pageout ?
878		 * (and this is the last tblk bound with the page)
879		 */
880		if (tblk->flag & tblkGC_FREE)
881			lbmFree(bp);
882		/* did page become full after pageout ?
883		 * (and this is the last tblk bound with the page)
884		 */
885		else if (tblk->flag & tblkGC_EOP) {
886			/* finalize the page */
887			lp = (struct logpage *) bp->l_ldata;
888			bp->l_ceor = bp->l_eor;
889			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
890			jfs_info("lmPostGC: calling lbmWrite");
891			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
892				 1);
893		}
894
895	}
896
897	/* are there any transactions who have entered lnGroupCommit()
898	 * (whose COMMITs are after that of the last log page written.
899	 * They are waiting for new group commit (above at (SLEEP 1))
900	 * or lazy transactions are on a full (queued) log page,
901	 * select the latest ready transaction as new group leader and
902	 * wake her up to lead her group.
903	 */
904	if ((!list_empty(&log->cqueue)) &&
905	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
906	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
907		/*
908		 * Call lmGCwrite with new group leader
909		 */
910		lmGCwrite(log, 1);
911
912	/* no transaction are ready yet (transactions are only just
913	 * queued (GC_QUEUE) and not entered for group commit yet).
914	 * the first transaction entering group commit
915	 * will elect herself as new group leader.
916	 */
917	else
918		log->cflag &= ~logGC_PAGEOUT;
919
920	//LOGGC_UNLOCK(log);
921	spin_unlock_irqrestore(&log->gclock, flags);
922	return;
923}
924
925/*
926 * NAME:	lmLogSync()
927 *
928 * FUNCTION:	write log SYNCPT record for specified log
929 *	if new sync address is available
930 *	(normally the case if sync() is executed by back-ground
931 *	process).
932 *	calculate new value of i_nextsync which determines when
933 *	this code is called again.
934 *
935 * PARAMETERS:	log	- log structure
936 *		hard_sync - 1 to force all metadata to be written
937 *
938 * RETURN:	0
939 *
940 * serialization: LOG_LOCK() held on entry/exit
941 */
942static int lmLogSync(struct jfs_log * log, int hard_sync)
943{
944	int logsize;
945	int written;		/* written since last syncpt */
946	int free;		/* free space left available */
947	int delta;		/* additional delta to write normally */
948	int more;		/* additional write granted */
949	struct lrd lrd;
950	int lsn;
951	struct logsyncblk *lp;
952	unsigned long flags;
953
954	/* push dirty metapages out to disk */
955	if (hard_sync)
956		write_special_inodes(log, filemap_fdatawrite);
957	else
958		write_special_inodes(log, filemap_flush);
959
960	/*
961	 *	forward syncpt
962	 */
963	/* if last sync is same as last syncpt,
964	 * invoke sync point forward processing to update sync.
965	 */
966
967	if (log->sync == log->syncpt) {
968		LOGSYNC_LOCK(log, flags);
969		if (list_empty(&log->synclist))
970			log->sync = log->lsn;
971		else {
972			lp = list_entry(log->synclist.next,
973					struct logsyncblk, synclist);
974			log->sync = lp->lsn;
975		}
976		LOGSYNC_UNLOCK(log, flags);
977
978	}
979
980	/* if sync is different from last syncpt,
981	 * write a SYNCPT record with syncpt = sync.
982	 * reset syncpt = sync
983	 */
984	if (log->sync != log->syncpt) {
985		lrd.logtid = 0;
986		lrd.backchain = 0;
987		lrd.type = cpu_to_le16(LOG_SYNCPT);
988		lrd.length = 0;
989		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
990		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
991
992		log->syncpt = log->sync;
993	} else
994		lsn = log->lsn;
995
996	/*
997	 *	setup next syncpt trigger (SWAG)
998	 */
999	logsize = log->logsize;
1000
1001	logdiff(written, lsn, log);
1002	free = logsize - written;
1003	delta = LOGSYNC_DELTA(logsize);
1004	more = min(free / 2, delta);
1005	if (more < 2 * LOGPSIZE) {
1006		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007		/*
1008		 *	log wrapping
1009		 *
1010		 * option 1 - panic ? No.!
1011		 * option 2 - shutdown file systems
1012		 *	      associated with log ?
1013		 * option 3 - extend log ?
1014		 * option 4 - second chance
1015		 *
1016		 * mark log wrapped, and continue.
1017		 * when all active transactions are completed,
1018		 * mark log valid for recovery.
1019		 * if crashed during invalid state, log state
1020		 * implies invalid log, forcing fsck().
1021		 */
1022		/* mark log state log wrap in log superblock */
1023		/* log->state = LOGWRAP; */
1024
1025		/* reset sync point computation */
1026		log->syncpt = log->sync = lsn;
1027		log->nextsync = delta;
1028	} else
1029		/* next syncpt trigger = written + more */
1030		log->nextsync = written + more;
1031
1032	/* if number of bytes written from last sync point is more
1033	 * than 1/4 of the log size, stop new transactions from
1034	 * starting until all current transactions are completed
1035	 * by setting syncbarrier flag.
1036	 */
1037	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039		set_bit(log_SYNCBARRIER, &log->flag);
1040		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041			 log->syncpt);
1042		/*
1043		 * We may have to initiate group commit
1044		 */
1045		jfs_flush_journal(log, 0);
1046	}
1047
1048	return lsn;
1049}
1050
1051/*
1052 * NAME:	jfs_syncpt
1053 *
1054 * FUNCTION:	write log SYNCPT record for specified log
1055 *
1056 * PARAMETERS:	log	  - log structure
1057 *		hard_sync - set to 1 to force metadata to be written
1058 */
1059void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060{	LOG_LOCK(log);
1061	if (!test_bit(log_QUIESCE, &log->flag))
1062		lmLogSync(log, hard_sync);
1063	LOG_UNLOCK(log);
1064}
1065
1066/*
1067 * NAME:	lmLogOpen()
1068 *
1069 * FUNCTION:	open the log on first open;
1070 *	insert filesystem in the active list of the log.
1071 *
1072 * PARAMETER:	ipmnt	- file system mount inode
1073 *		iplog	- log inode (out)
1074 *
1075 * RETURN:
1076 *
1077 * serialization:
1078 */
1079int lmLogOpen(struct super_block *sb)
1080{
1081	int rc;
1082	struct block_device *bdev;
1083	struct jfs_log *log;
1084	struct jfs_sb_info *sbi = JFS_SBI(sb);
1085
1086	if (sbi->flag & JFS_NOINTEGRITY)
1087		return open_dummy_log(sb);
1088
1089	if (sbi->mntflag & JFS_INLINELOG)
1090		return open_inline_log(sb);
1091
1092	mutex_lock(&jfs_log_mutex);
1093	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1094		if (log->bdev->bd_dev == sbi->logdev) {
1095			if (memcmp(log->uuid, sbi->loguuid,
1096				   sizeof(log->uuid))) {
1097				jfs_warn("wrong uuid on JFS journal\n");
1098				mutex_unlock(&jfs_log_mutex);
1099				return -EINVAL;
1100			}
1101			/*
1102			 * add file system to log active file system list
1103			 */
1104			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1105				mutex_unlock(&jfs_log_mutex);
1106				return rc;
1107			}
1108			goto journal_found;
1109		}
1110	}
1111
1112	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1113		mutex_unlock(&jfs_log_mutex);
1114		return -ENOMEM;
1115	}
1116	INIT_LIST_HEAD(&log->sb_list);
1117	init_waitqueue_head(&log->syncwait);
1118
1119	/*
1120	 *	external log as separate logical volume
1121	 *
1122	 * file systems to log may have n-to-1 relationship;
1123	 */
1124
1125	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1126				 log);
1127	if (IS_ERR(bdev)) {
1128		rc = PTR_ERR(bdev);
1129		goto free;
1130	}
1131
1132	log->bdev = bdev;
1133	memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1134
1135	/*
1136	 * initialize log:
1137	 */
1138	if ((rc = lmLogInit(log)))
1139		goto close;
1140
1141	list_add(&log->journal_list, &jfs_external_logs);
1142
1143	/*
1144	 * add file system to log active file system list
1145	 */
1146	if ((rc = lmLogFileSystem(log, sbi, 1)))
1147		goto shutdown;
1148
1149journal_found:
1150	LOG_LOCK(log);
1151	list_add(&sbi->log_list, &log->sb_list);
1152	sbi->log = log;
1153	LOG_UNLOCK(log);
1154
1155	mutex_unlock(&jfs_log_mutex);
1156	return 0;
1157
1158	/*
1159	 *	unwind on error
1160	 */
1161      shutdown:		/* unwind lbmLogInit() */
1162	list_del(&log->journal_list);
1163	lbmLogShutdown(log);
1164
1165      close:		/* close external log device */
1166	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1167
1168      free:		/* free log descriptor */
1169	mutex_unlock(&jfs_log_mutex);
1170	kfree(log);
1171
1172	jfs_warn("lmLogOpen: exit(%d)", rc);
1173	return rc;
1174}
1175
1176static int open_inline_log(struct super_block *sb)
1177{
1178	struct jfs_log *log;
1179	int rc;
1180
1181	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1182		return -ENOMEM;
1183	INIT_LIST_HEAD(&log->sb_list);
1184	init_waitqueue_head(&log->syncwait);
1185
1186	set_bit(log_INLINELOG, &log->flag);
1187	log->bdev = sb->s_bdev;
1188	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1189	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1190	    (L2LOGPSIZE - sb->s_blocksize_bits);
1191	log->l2bsize = sb->s_blocksize_bits;
1192	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1193
1194	/*
1195	 * initialize log.
1196	 */
1197	if ((rc = lmLogInit(log))) {
1198		kfree(log);
1199		jfs_warn("lmLogOpen: exit(%d)", rc);
1200		return rc;
1201	}
1202
1203	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1204	JFS_SBI(sb)->log = log;
1205
1206	return rc;
1207}
1208
1209static int open_dummy_log(struct super_block *sb)
1210{
1211	int rc;
1212
1213	mutex_lock(&jfs_log_mutex);
1214	if (!dummy_log) {
1215		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1216		if (!dummy_log) {
1217			mutex_unlock(&jfs_log_mutex);
1218			return -ENOMEM;
1219		}
1220		INIT_LIST_HEAD(&dummy_log->sb_list);
1221		init_waitqueue_head(&dummy_log->syncwait);
1222		dummy_log->no_integrity = 1;
1223		/* Make up some stuff */
1224		dummy_log->base = 0;
1225		dummy_log->size = 1024;
1226		rc = lmLogInit(dummy_log);
1227		if (rc) {
1228			kfree(dummy_log);
1229			dummy_log = NULL;
1230			mutex_unlock(&jfs_log_mutex);
1231			return rc;
1232		}
1233	}
1234
1235	LOG_LOCK(dummy_log);
1236	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1237	JFS_SBI(sb)->log = dummy_log;
1238	LOG_UNLOCK(dummy_log);
1239	mutex_unlock(&jfs_log_mutex);
1240
1241	return 0;
1242}
1243
1244/*
1245 * NAME:	lmLogInit()
1246 *
1247 * FUNCTION:	log initialization at first log open.
1248 *
1249 *	logredo() (or logformat()) should have been run previously.
1250 *	initialize the log from log superblock.
1251 *	set the log state in the superblock to LOGMOUNT and
1252 *	write SYNCPT log record.
1253 *
1254 * PARAMETER:	log	- log structure
1255 *
1256 * RETURN:	0	- if ok
1257 *		-EINVAL	- bad log magic number or superblock dirty
1258 *		error returned from logwait()
1259 *
1260 * serialization: single first open thread
1261 */
1262int lmLogInit(struct jfs_log * log)
1263{
1264	int rc = 0;
1265	struct lrd lrd;
1266	struct logsuper *logsuper;
1267	struct lbuf *bpsuper;
1268	struct lbuf *bp;
1269	struct logpage *lp;
1270	int lsn = 0;
1271
1272	jfs_info("lmLogInit: log:0x%p", log);
1273
1274	/* initialize the group commit serialization lock */
1275	LOGGC_LOCK_INIT(log);
1276
1277	/* allocate/initialize the log write serialization lock */
1278	LOG_LOCK_INIT(log);
1279
1280	LOGSYNC_LOCK_INIT(log);
1281
1282	INIT_LIST_HEAD(&log->synclist);
1283
1284	INIT_LIST_HEAD(&log->cqueue);
1285	log->flush_tblk = NULL;
1286
1287	log->count = 0;
1288
1289	/*
1290	 * initialize log i/o
1291	 */
1292	if ((rc = lbmLogInit(log)))
1293		return rc;
1294
1295	if (!test_bit(log_INLINELOG, &log->flag))
1296		log->l2bsize = L2LOGPSIZE;
1297
1298	/* check for disabled journaling to disk */
1299	if (log->no_integrity) {
1300		/*
1301		 * Journal pages will still be filled.  When the time comes
1302		 * to actually do the I/O, the write is not done, and the
1303		 * endio routine is called directly.
1304		 */
1305		bp = lbmAllocate(log , 0);
1306		log->bp = bp;
1307		bp->l_pn = bp->l_eor = 0;
1308	} else {
1309		/*
1310		 * validate log superblock
1311		 */
1312		if ((rc = lbmRead(log, 1, &bpsuper)))
1313			goto errout10;
1314
1315		logsuper = (struct logsuper *) bpsuper->l_ldata;
1316
1317		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1318			jfs_warn("*** Log Format Error ! ***");
1319			rc = -EINVAL;
1320			goto errout20;
1321		}
1322
1323		/* logredo() should have been run successfully. */
1324		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1325			jfs_warn("*** Log Is Dirty ! ***");
1326			rc = -EINVAL;
1327			goto errout20;
1328		}
1329
1330		/* initialize log from log superblock */
1331		if (test_bit(log_INLINELOG,&log->flag)) {
1332			if (log->size != le32_to_cpu(logsuper->size)) {
1333				rc = -EINVAL;
1334				goto errout20;
1335			}
1336			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1337				 "size:0x%x", log,
1338				 (unsigned long long) log->base, log->size);
1339		} else {
1340			if (memcmp(logsuper->uuid, log->uuid, 16)) {
1341				jfs_warn("wrong uuid on JFS log device");
1342				goto errout20;
1343			}
1344			log->size = le32_to_cpu(logsuper->size);
1345			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1346			jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1347				 "size:0x%x", log,
1348				 (unsigned long long) log->base, log->size);
1349		}
1350
1351		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1352		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1353
1354		/*
1355		 * initialize for log append write mode
1356		 */
1357		/* establish current/end-of-log page/buffer */
1358		if ((rc = lbmRead(log, log->page, &bp)))
1359			goto errout20;
1360
1361		lp = (struct logpage *) bp->l_ldata;
1362
1363		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1364			 le32_to_cpu(logsuper->end), log->page, log->eor,
1365			 le16_to_cpu(lp->h.eor));
1366
1367		log->bp = bp;
1368		bp->l_pn = log->page;
1369		bp->l_eor = log->eor;
1370
1371		/* if current page is full, move on to next page */
1372		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1373			lmNextPage(log);
1374
1375		/*
1376		 * initialize log syncpoint
1377		 */
1378		/*
1379		 * write the first SYNCPT record with syncpoint = 0
1380		 * (i.e., log redo up to HERE !);
1381		 * remove current page from lbm write queue at end of pageout
1382		 * (to write log superblock update), but do not release to
1383		 * freelist;
1384		 */
1385		lrd.logtid = 0;
1386		lrd.backchain = 0;
1387		lrd.type = cpu_to_le16(LOG_SYNCPT);
1388		lrd.length = 0;
1389		lrd.log.syncpt.sync = 0;
1390		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1391		bp = log->bp;
1392		bp->l_ceor = bp->l_eor;
1393		lp = (struct logpage *) bp->l_ldata;
1394		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1395		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1396		if ((rc = lbmIOWait(bp, 0)))
1397			goto errout30;
1398
1399		/*
1400		 * update/write superblock
1401		 */
1402		logsuper->state = cpu_to_le32(LOGMOUNT);
1403		log->serial = le32_to_cpu(logsuper->serial) + 1;
1404		logsuper->serial = cpu_to_le32(log->serial);
1405		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1406		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1407			goto errout30;
1408	}
1409
1410	/* initialize logsync parameters */
1411	log->logsize = (log->size - 2) << L2LOGPSIZE;
1412	log->lsn = lsn;
1413	log->syncpt = lsn;
1414	log->sync = log->syncpt;
1415	log->nextsync = LOGSYNC_DELTA(log->logsize);
1416
1417	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1418		 log->lsn, log->syncpt, log->sync);
1419
1420	/*
1421	 * initialize for lazy/group commit
1422	 */
1423	log->clsn = lsn;
1424
1425	return 0;
1426
1427	/*
1428	 *	unwind on error
1429	 */
1430      errout30:		/* release log page */
1431	log->wqueue = NULL;
1432	bp->l_wqnext = NULL;
1433	lbmFree(bp);
1434
1435      errout20:		/* release log superblock */
1436	lbmFree(bpsuper);
1437
1438      errout10:		/* unwind lbmLogInit() */
1439	lbmLogShutdown(log);
1440
1441	jfs_warn("lmLogInit: exit(%d)", rc);
1442	return rc;
1443}
1444
1445
1446/*
1447 * NAME:	lmLogClose()
1448 *
1449 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1450 *		and close it on last close.
1451 *
1452 * PARAMETER:	sb	- superblock
1453 *
1454 * RETURN:	errors from subroutines
1455 *
1456 * serialization:
1457 */
1458int lmLogClose(struct super_block *sb)
1459{
1460	struct jfs_sb_info *sbi = JFS_SBI(sb);
1461	struct jfs_log *log = sbi->log;
1462	struct block_device *bdev;
1463	int rc = 0;
1464
1465	jfs_info("lmLogClose: log:0x%p", log);
1466
1467	mutex_lock(&jfs_log_mutex);
1468	LOG_LOCK(log);
1469	list_del(&sbi->log_list);
1470	LOG_UNLOCK(log);
1471	sbi->log = NULL;
1472
1473	/*
1474	 * We need to make sure all of the "written" metapages
1475	 * actually make it to disk
1476	 */
1477	sync_blockdev(sb->s_bdev);
1478
1479	if (test_bit(log_INLINELOG, &log->flag)) {
1480		/*
1481		 *	in-line log in host file system
1482		 */
1483		rc = lmLogShutdown(log);
1484		kfree(log);
1485		goto out;
1486	}
1487
1488	if (!log->no_integrity)
1489		lmLogFileSystem(log, sbi, 0);
1490
1491	if (!list_empty(&log->sb_list))
1492		goto out;
1493
1494	/*
1495	 * TODO: ensure that the dummy_log is in a state to allow
1496	 * lbmLogShutdown to deallocate all the buffers and call
1497	 * kfree against dummy_log.  For now, leave dummy_log & its
1498	 * buffers in memory, and resuse if another no-integrity mount
1499	 * is requested.
1500	 */
1501	if (log->no_integrity)
1502		goto out;
1503
1504	/*
1505	 *	external log as separate logical volume
1506	 */
1507	list_del(&log->journal_list);
1508	bdev = log->bdev;
1509	rc = lmLogShutdown(log);
1510
1511	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1512
1513	kfree(log);
1514
1515      out:
1516	mutex_unlock(&jfs_log_mutex);
1517	jfs_info("lmLogClose: exit(%d)", rc);
1518	return rc;
1519}
1520
1521
1522/*
1523 * NAME:	jfs_flush_journal()
1524 *
1525 * FUNCTION:	initiate write of any outstanding transactions to the journal
1526 *		and optionally wait until they are all written to disk
1527 *
1528 *		wait == 0  flush until latest txn is committed, don't wait
1529 *		wait == 1  flush until latest txn is committed, wait
1530 *		wait > 1   flush until all txn's are complete, wait
1531 */
1532void jfs_flush_journal(struct jfs_log *log, int wait)
1533{
1534	int i;
1535	struct tblock *target = NULL;
1536
1537	/* jfs_write_inode may call us during read-only mount */
1538	if (!log)
1539		return;
1540
1541	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1542
1543	LOGGC_LOCK(log);
1544
1545	if (!list_empty(&log->cqueue)) {
1546		/*
1547		 * This ensures that we will keep writing to the journal as long
1548		 * as there are unwritten commit records
1549		 */
1550		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1551
1552		if (test_bit(log_FLUSH, &log->flag)) {
1553			/*
1554			 * We're already flushing.
1555			 * if flush_tblk is NULL, we are flushing everything,
1556			 * so leave it that way.  Otherwise, update it to the
1557			 * latest transaction
1558			 */
1559			if (log->flush_tblk)
1560				log->flush_tblk = target;
1561		} else {
1562			/* Only flush until latest transaction is committed */
1563			log->flush_tblk = target;
1564			set_bit(log_FLUSH, &log->flag);
1565
1566			/*
1567			 * Initiate I/O on outstanding transactions
1568			 */
1569			if (!(log->cflag & logGC_PAGEOUT)) {
1570				log->cflag |= logGC_PAGEOUT;
1571				lmGCwrite(log, 0);
1572			}
1573		}
1574	}
1575	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1576		/* Flush until all activity complete */
1577		set_bit(log_FLUSH, &log->flag);
1578		log->flush_tblk = NULL;
1579	}
1580
1581	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1582		DECLARE_WAITQUEUE(__wait, current);
1583
1584		add_wait_queue(&target->gcwait, &__wait);
1585		set_current_state(TASK_UNINTERRUPTIBLE);
1586		LOGGC_UNLOCK(log);
1587		schedule();
1588		LOGGC_LOCK(log);
1589		remove_wait_queue(&target->gcwait, &__wait);
1590	}
1591	LOGGC_UNLOCK(log);
1592
1593	if (wait < 2)
1594		return;
1595
1596	write_special_inodes(log, filemap_fdatawrite);
1597
1598	/*
1599	 * If there was recent activity, we may need to wait
1600	 * for the lazycommit thread to catch up
1601	 */
1602	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1603		for (i = 0; i < 200; i++) {	/* Too much? */
1604			msleep(250);
1605			write_special_inodes(log, filemap_fdatawrite);
1606			if (list_empty(&log->cqueue) &&
1607			    list_empty(&log->synclist))
1608				break;
1609		}
1610	}
1611	assert(list_empty(&log->cqueue));
1612
1613#ifdef CONFIG_JFS_DEBUG
1614	if (!list_empty(&log->synclist)) {
1615		struct logsyncblk *lp;
1616
1617		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1618		list_for_each_entry(lp, &log->synclist, synclist) {
1619			if (lp->xflag & COMMIT_PAGE) {
1620				struct metapage *mp = (struct metapage *)lp;
1621				print_hex_dump(KERN_ERR, "metapage: ",
1622					       DUMP_PREFIX_ADDRESS, 16, 4,
1623					       mp, sizeof(struct metapage), 0);
1624				print_hex_dump(KERN_ERR, "page: ",
1625					       DUMP_PREFIX_ADDRESS, 16,
1626					       sizeof(long), mp->page,
1627					       sizeof(struct page), 0);
1628			} else
1629				print_hex_dump(KERN_ERR, "tblock:",
1630					       DUMP_PREFIX_ADDRESS, 16, 4,
1631					       lp, sizeof(struct tblock), 0);
1632		}
1633	}
1634#else
1635	WARN_ON(!list_empty(&log->synclist));
1636#endif
1637	clear_bit(log_FLUSH, &log->flag);
1638}
1639
1640/*
1641 * NAME:	lmLogShutdown()
1642 *
1643 * FUNCTION:	log shutdown at last LogClose().
1644 *
1645 *		write log syncpt record.
1646 *		update super block to set redone flag to 0.
1647 *
1648 * PARAMETER:	log	- log inode
1649 *
1650 * RETURN:	0	- success
1651 *
1652 * serialization: single last close thread
1653 */
1654int lmLogShutdown(struct jfs_log * log)
1655{
1656	int rc;
1657	struct lrd lrd;
1658	int lsn;
1659	struct logsuper *logsuper;
1660	struct lbuf *bpsuper;
1661	struct lbuf *bp;
1662	struct logpage *lp;
1663
1664	jfs_info("lmLogShutdown: log:0x%p", log);
1665
1666	jfs_flush_journal(log, 2);
1667
1668	/*
1669	 * write the last SYNCPT record with syncpoint = 0
1670	 * (i.e., log redo up to HERE !)
1671	 */
1672	lrd.logtid = 0;
1673	lrd.backchain = 0;
1674	lrd.type = cpu_to_le16(LOG_SYNCPT);
1675	lrd.length = 0;
1676	lrd.log.syncpt.sync = 0;
1677
1678	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1679	bp = log->bp;
1680	lp = (struct logpage *) bp->l_ldata;
1681	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1682	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1683	lbmIOWait(log->bp, lbmFREE);
1684	log->bp = NULL;
1685
1686	/*
1687	 * synchronous update log superblock
1688	 * mark log state as shutdown cleanly
1689	 * (i.e., Log does not need to be replayed).
1690	 */
1691	if ((rc = lbmRead(log, 1, &bpsuper)))
1692		goto out;
1693
1694	logsuper = (struct logsuper *) bpsuper->l_ldata;
1695	logsuper->state = cpu_to_le32(LOGREDONE);
1696	logsuper->end = cpu_to_le32(lsn);
1697	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1698	rc = lbmIOWait(bpsuper, lbmFREE);
1699
1700	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1701		 lsn, log->page, log->eor);
1702
1703      out:
1704	/*
1705	 * shutdown per log i/o
1706	 */
1707	lbmLogShutdown(log);
1708
1709	if (rc) {
1710		jfs_warn("lmLogShutdown: exit(%d)", rc);
1711	}
1712	return rc;
1713}
1714
1715
1716/*
1717 * NAME:	lmLogFileSystem()
1718 *
1719 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1720 *	file system into/from log active file system list.
1721 *
1722 * PARAMETE:	log	- pointer to logs inode.
1723 *		fsdev	- kdev_t of filesystem.
1724 *		serial	- pointer to returned log serial number
1725 *		activate - insert/remove device from active list.
1726 *
1727 * RETURN:	0	- success
1728 *		errors returned by vms_iowait().
1729 */
1730static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1731			   int activate)
1732{
1733	int rc = 0;
1734	int i;
1735	struct logsuper *logsuper;
1736	struct lbuf *bpsuper;
1737	char *uuid = sbi->uuid;
1738
1739	/*
1740	 * insert/remove file system device to log active file system list.
1741	 */
1742	if ((rc = lbmRead(log, 1, &bpsuper)))
1743		return rc;
1744
1745	logsuper = (struct logsuper *) bpsuper->l_ldata;
1746	if (activate) {
1747		for (i = 0; i < MAX_ACTIVE; i++)
1748			if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1749				memcpy(logsuper->active[i].uuid, uuid, 16);
1750				sbi->aggregate = i;
1751				break;
1752			}
1753		if (i == MAX_ACTIVE) {
1754			jfs_warn("Too many file systems sharing journal!");
1755			lbmFree(bpsuper);
1756			return -EMFILE;	/* Is there a better rc? */
1757		}
1758	} else {
1759		for (i = 0; i < MAX_ACTIVE; i++)
1760			if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1761				memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1762				break;
1763			}
1764		if (i == MAX_ACTIVE) {
1765			jfs_warn("Somebody stomped on the journal!");
1766			lbmFree(bpsuper);
1767			return -EIO;
1768		}
1769
1770	}
1771
1772	/*
1773	 * synchronous write log superblock:
1774	 *
1775	 * write sidestream bypassing write queue:
1776	 * at file system mount, log super block is updated for
1777	 * activation of the file system before any log record
1778	 * (MOUNT record) of the file system, and at file system
1779	 * unmount, all meta data for the file system has been
1780	 * flushed before log super block is updated for deactivation
1781	 * of the file system.
1782	 */
1783	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1784	rc = lbmIOWait(bpsuper, lbmFREE);
1785
1786	return rc;
1787}
1788
1789/*
1790 *		log buffer manager (lbm)
1791 *		------------------------
1792 *
1793 * special purpose buffer manager supporting log i/o requirements.
1794 *
1795 * per log write queue:
1796 * log pageout occurs in serial order by fifo write queue and
1797 * restricting to a single i/o in pregress at any one time.
1798 * a circular singly-linked list
1799 * (log->wrqueue points to the tail, and buffers are linked via
1800 * bp->wrqueue field), and
1801 * maintains log page in pageout ot waiting for pageout in serial pageout.
1802 */
1803
1804/*
1805 *	lbmLogInit()
1806 *
1807 * initialize per log I/O setup at lmLogInit()
1808 */
1809static int lbmLogInit(struct jfs_log * log)
1810{				/* log inode */
1811	int i;
1812	struct lbuf *lbuf;
1813
1814	jfs_info("lbmLogInit: log:0x%p", log);
1815
1816	/* initialize current buffer cursor */
1817	log->bp = NULL;
1818
1819	/* initialize log device write queue */
1820	log->wqueue = NULL;
1821
1822	/*
1823	 * Each log has its own buffer pages allocated to it.  These are
1824	 * not managed by the page cache.  This ensures that a transaction
1825	 * writing to the log does not block trying to allocate a page from
1826	 * the page cache (for the log).  This would be bad, since page
1827	 * allocation waits on the kswapd thread that may be committing inodes
1828	 * which would cause log activity.  Was that clear?  I'm trying to
1829	 * avoid deadlock here.
1830	 */
1831	init_waitqueue_head(&log->free_wait);
1832
1833	log->lbuf_free = NULL;
1834
1835	for (i = 0; i < LOGPAGES;) {
1836		char *buffer;
1837		uint offset;
1838		struct page *page;
1839
1840		buffer = (char *) get_zeroed_page(GFP_KERNEL);
1841		if (buffer == NULL)
1842			goto error;
1843		page = virt_to_page(buffer);
1844		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1845			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1846			if (lbuf == NULL) {
1847				if (offset == 0)
1848					free_page((unsigned long) buffer);
1849				goto error;
1850			}
1851			if (offset) /* we already have one reference */
1852				get_page(page);
1853			lbuf->l_offset = offset;
1854			lbuf->l_ldata = buffer + offset;
1855			lbuf->l_page = page;
1856			lbuf->l_log = log;
1857			init_waitqueue_head(&lbuf->l_ioevent);
1858
1859			lbuf->l_freelist = log->lbuf_free;
1860			log->lbuf_free = lbuf;
1861			i++;
1862		}
1863	}
1864
1865	return (0);
1866
1867      error:
1868	lbmLogShutdown(log);
1869	return -ENOMEM;
1870}
1871
1872
1873/*
1874 *	lbmLogShutdown()
1875 *
1876 * finalize per log I/O setup at lmLogShutdown()
1877 */
1878static void lbmLogShutdown(struct jfs_log * log)
1879{
1880	struct lbuf *lbuf;
1881
1882	jfs_info("lbmLogShutdown: log:0x%p", log);
1883
1884	lbuf = log->lbuf_free;
1885	while (lbuf) {
1886		struct lbuf *next = lbuf->l_freelist;
1887		__free_page(lbuf->l_page);
1888		kfree(lbuf);
1889		lbuf = next;
1890	}
1891}
1892
1893
1894/*
1895 *	lbmAllocate()
1896 *
1897 * allocate an empty log buffer
1898 */
1899static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1900{
1901	struct lbuf *bp;
1902	unsigned long flags;
1903
1904	/*
1905	 * recycle from log buffer freelist if any
1906	 */
1907	LCACHE_LOCK(flags);
1908	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1909	log->lbuf_free = bp->l_freelist;
1910	LCACHE_UNLOCK(flags);
1911
1912	bp->l_flag = 0;
1913
1914	bp->l_wqnext = NULL;
1915	bp->l_freelist = NULL;
1916
1917	bp->l_pn = pn;
1918	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1919	bp->l_ceor = 0;
1920
1921	return bp;
1922}
1923
1924
1925/*
1926 *	lbmFree()
1927 *
1928 * release a log buffer to freelist
1929 */
1930static void lbmFree(struct lbuf * bp)
1931{
1932	unsigned long flags;
1933
1934	LCACHE_LOCK(flags);
1935
1936	lbmfree(bp);
1937
1938	LCACHE_UNLOCK(flags);
1939}
1940
1941static void lbmfree(struct lbuf * bp)
1942{
1943	struct jfs_log *log = bp->l_log;
1944
1945	assert(bp->l_wqnext == NULL);
1946
1947	/*
1948	 * return the buffer to head of freelist
1949	 */
1950	bp->l_freelist = log->lbuf_free;
1951	log->lbuf_free = bp;
1952
1953	wake_up(&log->free_wait);
1954	return;
1955}
1956
1957
1958/*
1959 * NAME:	lbmRedrive
1960 *
1961 * FUNCTION:	add a log buffer to the log redrive list
1962 *
1963 * PARAMETER:
1964 *	bp	- log buffer
1965 *
1966 * NOTES:
1967 *	Takes log_redrive_lock.
1968 */
1969static inline void lbmRedrive(struct lbuf *bp)
1970{
1971	unsigned long flags;
1972
1973	spin_lock_irqsave(&log_redrive_lock, flags);
1974	bp->l_redrive_next = log_redrive_list;
1975	log_redrive_list = bp;
1976	spin_unlock_irqrestore(&log_redrive_lock, flags);
1977
1978	wake_up_process(jfsIOthread);
1979}
1980
1981
1982/*
1983 *	lbmRead()
1984 */
1985static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1986{
1987	struct bio *bio;
1988	struct lbuf *bp;
1989
1990	/*
1991	 * allocate a log buffer
1992	 */
1993	*bpp = bp = lbmAllocate(log, pn);
1994	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1995
1996	bp->l_flag |= lbmREAD;
1997
1998	bio = bio_alloc(GFP_NOFS, 1);
1999
2000	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2001	bio->bi_bdev = log->bdev;
2002
2003	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2004	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2005
2006	bio->bi_end_io = lbmIODone;
2007	bio->bi_private = bp;
2008	/*check if journaling to disk has been disabled*/
2009	if (log->no_integrity) {
2010		bio->bi_iter.bi_size = 0;
2011		lbmIODone(bio);
2012	} else {
2013		submit_bio(READ_SYNC, bio);
2014	}
2015
2016	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2017
2018	return 0;
2019}
2020
2021
2022/*
2023 *	lbmWrite()
2024 *
2025 * buffer at head of pageout queue stays after completion of
2026 * partial-page pageout and redriven by explicit initiation of
2027 * pageout by caller until full-page pageout is completed and
2028 * released.
2029 *
2030 * device driver i/o done redrives pageout of new buffer at
2031 * head of pageout queue when current buffer at head of pageout
2032 * queue is released at the completion of its full-page pageout.
2033 *
2034 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2035 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2036 */
2037static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2038		     int cant_block)
2039{
2040	struct lbuf *tail;
2041	unsigned long flags;
2042
2043	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2044
2045	/* map the logical block address to physical block address */
2046	bp->l_blkno =
2047	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2048
2049	LCACHE_LOCK(flags);		/* disable+lock */
2050
2051	/*
2052	 * initialize buffer for device driver
2053	 */
2054	bp->l_flag = flag;
2055
2056	/*
2057	 *	insert bp at tail of write queue associated with log
2058	 *
2059	 * (request is either for bp already/currently at head of queue
2060	 * or new bp to be inserted at tail)
2061	 */
2062	tail = log->wqueue;
2063
2064	/* is buffer not already on write queue ? */
2065	if (bp->l_wqnext == NULL) {
2066		/* insert at tail of wqueue */
2067		if (tail == NULL) {
2068			log->wqueue = bp;
2069			bp->l_wqnext = bp;
2070		} else {
2071			log->wqueue = bp;
2072			bp->l_wqnext = tail->l_wqnext;
2073			tail->l_wqnext = bp;
2074		}
2075
2076		tail = bp;
2077	}
2078
2079	/* is buffer at head of wqueue and for write ? */
2080	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2081		LCACHE_UNLOCK(flags);	/* unlock+enable */
2082		return;
2083	}
2084
2085	LCACHE_UNLOCK(flags);	/* unlock+enable */
2086
2087	if (cant_block)
2088		lbmRedrive(bp);
2089	else if (flag & lbmSYNC)
2090		lbmStartIO(bp);
2091	else {
2092		LOGGC_UNLOCK(log);
2093		lbmStartIO(bp);
2094		LOGGC_LOCK(log);
2095	}
2096}
2097
2098
2099/*
2100 *	lbmDirectWrite()
2101 *
2102 * initiate pageout bypassing write queue for sidestream
2103 * (e.g., log superblock) write;
2104 */
2105static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2106{
2107	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2108		 bp, flag, bp->l_pn);
2109
2110	/*
2111	 * initialize buffer for device driver
2112	 */
2113	bp->l_flag = flag | lbmDIRECT;
2114
2115	/* map the logical block address to physical block address */
2116	bp->l_blkno =
2117	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2118
2119	/*
2120	 *	initiate pageout of the page
2121	 */
2122	lbmStartIO(bp);
2123}
2124
2125
2126/*
2127 * NAME:	lbmStartIO()
2128 *
2129 * FUNCTION:	Interface to DD strategy routine
2130 *
2131 * RETURN:	none
2132 *
2133 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2134 */
2135static void lbmStartIO(struct lbuf * bp)
2136{
2137	struct bio *bio;
2138	struct jfs_log *log = bp->l_log;
2139
2140	jfs_info("lbmStartIO\n");
2141
2142	bio = bio_alloc(GFP_NOFS, 1);
2143	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2144	bio->bi_bdev = log->bdev;
2145
2146	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2147	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2148
2149	bio->bi_end_io = lbmIODone;
2150	bio->bi_private = bp;
2151
2152	/* check if journaling to disk has been disabled */
2153	if (log->no_integrity) {
2154		bio->bi_iter.bi_size = 0;
2155		lbmIODone(bio);
2156	} else {
2157		submit_bio(WRITE_SYNC, bio);
2158		INCREMENT(lmStat.submitted);
2159	}
2160}
2161
2162
2163/*
2164 *	lbmIOWait()
2165 */
2166static int lbmIOWait(struct lbuf * bp, int flag)
2167{
2168	unsigned long flags;
2169	int rc = 0;
2170
2171	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2172
2173	LCACHE_LOCK(flags);		/* disable+lock */
2174
2175	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2176
2177	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2178
2179	if (flag & lbmFREE)
2180		lbmfree(bp);
2181
2182	LCACHE_UNLOCK(flags);	/* unlock+enable */
2183
2184	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2185	return rc;
2186}
2187
2188/*
2189 *	lbmIODone()
2190 *
2191 * executed at INTIODONE level
2192 */
2193static void lbmIODone(struct bio *bio)
2194{
2195	struct lbuf *bp = bio->bi_private;
2196	struct lbuf *nextbp, *tail;
2197	struct jfs_log *log;
2198	unsigned long flags;
2199
2200	/*
2201	 * get back jfs buffer bound to the i/o buffer
2202	 */
2203	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2204
2205	LCACHE_LOCK(flags);		/* disable+lock */
2206
2207	bp->l_flag |= lbmDONE;
2208
2209	if (bio->bi_error) {
2210		bp->l_flag |= lbmERROR;
2211
2212		jfs_err("lbmIODone: I/O error in JFS log");
2213	}
2214
2215	bio_put(bio);
2216
2217	/*
2218	 *	pagein completion
2219	 */
2220	if (bp->l_flag & lbmREAD) {
2221		bp->l_flag &= ~lbmREAD;
2222
2223		LCACHE_UNLOCK(flags);	/* unlock+enable */
2224
2225		/* wakeup I/O initiator */
2226		LCACHE_WAKEUP(&bp->l_ioevent);
2227
2228		return;
2229	}
2230
2231	/*
2232	 *	pageout completion
2233	 *
2234	 * the bp at the head of write queue has completed pageout.
2235	 *
2236	 * if single-commit/full-page pageout, remove the current buffer
2237	 * from head of pageout queue, and redrive pageout with
2238	 * the new buffer at head of pageout queue;
2239	 * otherwise, the partial-page pageout buffer stays at
2240	 * the head of pageout queue to be redriven for pageout
2241	 * by lmGroupCommit() until full-page pageout is completed.
2242	 */
2243	bp->l_flag &= ~lbmWRITE;
2244	INCREMENT(lmStat.pagedone);
2245
2246	/* update committed lsn */
2247	log = bp->l_log;
2248	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2249
2250	if (bp->l_flag & lbmDIRECT) {
2251		LCACHE_WAKEUP(&bp->l_ioevent);
2252		LCACHE_UNLOCK(flags);
2253		return;
2254	}
2255
2256	tail = log->wqueue;
2257
2258	/* single element queue */
2259	if (bp == tail) {
2260		/* remove head buffer of full-page pageout
2261		 * from log device write queue
2262		 */
2263		if (bp->l_flag & lbmRELEASE) {
2264			log->wqueue = NULL;
2265			bp->l_wqnext = NULL;
2266		}
2267	}
2268	/* multi element queue */
2269	else {
2270		/* remove head buffer of full-page pageout
2271		 * from log device write queue
2272		 */
2273		if (bp->l_flag & lbmRELEASE) {
2274			nextbp = tail->l_wqnext = bp->l_wqnext;
2275			bp->l_wqnext = NULL;
2276
2277			/*
2278			 * redrive pageout of next page at head of write queue:
2279			 * redrive next page without any bound tblk
2280			 * (i.e., page w/o any COMMIT records), or
2281			 * first page of new group commit which has been
2282			 * queued after current page (subsequent pageout
2283			 * is performed synchronously, except page without
2284			 * any COMMITs) by lmGroupCommit() as indicated
2285			 * by lbmWRITE flag;
2286			 */
2287			if (nextbp->l_flag & lbmWRITE) {
2288				/*
2289				 * We can't do the I/O at interrupt time.
2290				 * The jfsIO thread can do it
2291				 */
2292				lbmRedrive(nextbp);
2293			}
2294		}
2295	}
2296
2297	/*
2298	 *	synchronous pageout:
2299	 *
2300	 * buffer has not necessarily been removed from write queue
2301	 * (e.g., synchronous write of partial-page with COMMIT):
2302	 * leave buffer for i/o initiator to dispose
2303	 */
2304	if (bp->l_flag & lbmSYNC) {
2305		LCACHE_UNLOCK(flags);	/* unlock+enable */
2306
2307		/* wakeup I/O initiator */
2308		LCACHE_WAKEUP(&bp->l_ioevent);
2309	}
2310
2311	/*
2312	 *	Group Commit pageout:
2313	 */
2314	else if (bp->l_flag & lbmGC) {
2315		LCACHE_UNLOCK(flags);
2316		lmPostGC(bp);
2317	}
2318
2319	/*
2320	 *	asynchronous pageout:
2321	 *
2322	 * buffer must have been removed from write queue:
2323	 * insert buffer at head of freelist where it can be recycled
2324	 */
2325	else {
2326		assert(bp->l_flag & lbmRELEASE);
2327		assert(bp->l_flag & lbmFREE);
2328		lbmfree(bp);
2329
2330		LCACHE_UNLOCK(flags);	/* unlock+enable */
2331	}
2332}
2333
2334int jfsIOWait(void *arg)
2335{
2336	struct lbuf *bp;
2337
2338	do {
2339		spin_lock_irq(&log_redrive_lock);
2340		while ((bp = log_redrive_list)) {
2341			log_redrive_list = bp->l_redrive_next;
2342			bp->l_redrive_next = NULL;
2343			spin_unlock_irq(&log_redrive_lock);
2344			lbmStartIO(bp);
2345			spin_lock_irq(&log_redrive_lock);
2346		}
2347
2348		if (freezing(current)) {
2349			spin_unlock_irq(&log_redrive_lock);
2350			try_to_freeze();
2351		} else {
2352			set_current_state(TASK_INTERRUPTIBLE);
2353			spin_unlock_irq(&log_redrive_lock);
2354			schedule();
2355		}
2356	} while (!kthread_should_stop());
2357
2358	jfs_info("jfsIOWait being killed!");
2359	return 0;
2360}
2361
2362/*
2363 * NAME:	lmLogFormat()/jfs_logform()
2364 *
2365 * FUNCTION:	format file system log
2366 *
2367 * PARAMETERS:
2368 *	log	- volume log
2369 *	logAddress - start address of log space in FS block
2370 *	logSize	- length of log space in FS block;
2371 *
2372 * RETURN:	0	- success
2373 *		-EIO	- i/o error
2374 *
2375 * XXX: We're synchronously writing one page at a time.  This needs to
2376 *	be improved by writing multiple pages at once.
2377 */
2378int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2379{
2380	int rc = -EIO;
2381	struct jfs_sb_info *sbi;
2382	struct logsuper *logsuper;
2383	struct logpage *lp;
2384	int lspn;		/* log sequence page number */
2385	struct lrd *lrd_ptr;
2386	int npages = 0;
2387	struct lbuf *bp;
2388
2389	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2390		 (long long)logAddress, logSize);
2391
2392	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2393
2394	/* allocate a log buffer */
2395	bp = lbmAllocate(log, 1);
2396
2397	npages = logSize >> sbi->l2nbperpage;
2398
2399	/*
2400	 *	log space:
2401	 *
2402	 * page 0 - reserved;
2403	 * page 1 - log superblock;
2404	 * page 2 - log data page: A SYNC log record is written
2405	 *	    into this page at logform time;
2406	 * pages 3-N - log data page: set to empty log data pages;
2407	 */
2408	/*
2409	 *	init log superblock: log page 1
2410	 */
2411	logsuper = (struct logsuper *) bp->l_ldata;
2412
2413	logsuper->magic = cpu_to_le32(LOGMAGIC);
2414	logsuper->version = cpu_to_le32(LOGVERSION);
2415	logsuper->state = cpu_to_le32(LOGREDONE);
2416	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2417	logsuper->size = cpu_to_le32(npages);
2418	logsuper->bsize = cpu_to_le32(sbi->bsize);
2419	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2420	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2421
2422	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2423	bp->l_blkno = logAddress + sbi->nbperpage;
2424	lbmStartIO(bp);
2425	if ((rc = lbmIOWait(bp, 0)))
2426		goto exit;
2427
2428	/*
2429	 *	init pages 2 to npages-1 as log data pages:
2430	 *
2431	 * log page sequence number (lpsn) initialization:
2432	 *
2433	 * pn:   0     1     2     3                 n-1
2434	 *       +-----+-----+=====+=====+===.....===+=====+
2435	 * lspn:             N-1   0     1           N-2
2436	 *                   <--- N page circular file ---->
2437	 *
2438	 * the N (= npages-2) data pages of the log is maintained as
2439	 * a circular file for the log records;
2440	 * lpsn grows by 1 monotonically as each log page is written
2441	 * to the circular file of the log;
2442	 * and setLogpage() will not reset the page number even if
2443	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2444	 * still work in find log end process, we have to simulate the
2445	 * log wrap situation at the log format time.
2446	 * The 1st log page written will have the highest lpsn. Then
2447	 * the succeeding log pages will have ascending order of
2448	 * the lspn starting from 0, ... (N-2)
2449	 */
2450	lp = (struct logpage *) bp->l_ldata;
2451	/*
2452	 * initialize 1st log page to be written: lpsn = N - 1,
2453	 * write a SYNCPT log record is written to this page
2454	 */
2455	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2456	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2457
2458	lrd_ptr = (struct lrd *) &lp->data;
2459	lrd_ptr->logtid = 0;
2460	lrd_ptr->backchain = 0;
2461	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2462	lrd_ptr->length = 0;
2463	lrd_ptr->log.syncpt.sync = 0;
2464
2465	bp->l_blkno += sbi->nbperpage;
2466	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2467	lbmStartIO(bp);
2468	if ((rc = lbmIOWait(bp, 0)))
2469		goto exit;
2470
2471	/*
2472	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2473	 */
2474	for (lspn = 0; lspn < npages - 3; lspn++) {
2475		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2476		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2477
2478		bp->l_blkno += sbi->nbperpage;
2479		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2480		lbmStartIO(bp);
2481		if ((rc = lbmIOWait(bp, 0)))
2482			goto exit;
2483	}
2484
2485	rc = 0;
2486exit:
2487	/*
2488	 *	finalize log
2489	 */
2490	/* release the buffer */
2491	lbmFree(bp);
2492
2493	return rc;
2494}
2495
2496#ifdef CONFIG_JFS_STATISTICS
2497static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2498{
2499	seq_printf(m,
2500		       "JFS Logmgr stats\n"
2501		       "================\n"
2502		       "commits = %d\n"
2503		       "writes submitted = %d\n"
2504		       "writes completed = %d\n"
2505		       "full pages submitted = %d\n"
2506		       "partial pages submitted = %d\n",
2507		       lmStat.commit,
2508		       lmStat.submitted,
2509		       lmStat.pagedone,
2510		       lmStat.full_page,
2511		       lmStat.partial_page);
2512	return 0;
2513}
2514
2515static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2516{
2517	return single_open(file, jfs_lmstats_proc_show, NULL);
2518}
2519
2520const struct file_operations jfs_lmstats_proc_fops = {
2521	.owner		= THIS_MODULE,
2522	.open		= jfs_lmstats_proc_open,
2523	.read		= seq_read,
2524	.llseek		= seq_lseek,
2525	.release	= single_release,
2526};
2527#endif /* CONFIG_JFS_STATISTICS */
2528