1/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 *             http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/prefetch.h>
16#include <linux/kthread.h>
17#include <linux/vmalloc.h>
18#include <linux/swap.h>
19
20#include "f2fs.h"
21#include "segment.h"
22#include "node.h"
23#include "trace.h"
24#include <trace/events/f2fs.h>
25
26#define __reverse_ffz(x) __reverse_ffs(~(x))
27
28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *sit_entry_set_slab;
30static struct kmem_cache *inmem_entry_slab;
31
32/*
33 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
34 * MSB and LSB are reversed in a byte by f2fs_set_bit.
35 */
36static inline unsigned long __reverse_ffs(unsigned long word)
37{
38	int num = 0;
39
40#if BITS_PER_LONG == 64
41	if ((word & 0xffffffff) == 0) {
42		num += 32;
43		word >>= 32;
44	}
45#endif
46	if ((word & 0xffff) == 0) {
47		num += 16;
48		word >>= 16;
49	}
50	if ((word & 0xff) == 0) {
51		num += 8;
52		word >>= 8;
53	}
54	if ((word & 0xf0) == 0)
55		num += 4;
56	else
57		word >>= 4;
58	if ((word & 0xc) == 0)
59		num += 2;
60	else
61		word >>= 2;
62	if ((word & 0x2) == 0)
63		num += 1;
64	return num;
65}
66
67/*
68 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
69 * f2fs_set_bit makes MSB and LSB reversed in a byte.
70 * Example:
71 *                             LSB <--> MSB
72 *   f2fs_set_bit(0, bitmap) => 0000 0001
73 *   f2fs_set_bit(7, bitmap) => 1000 0000
74 */
75static unsigned long __find_rev_next_bit(const unsigned long *addr,
76			unsigned long size, unsigned long offset)
77{
78	const unsigned long *p = addr + BIT_WORD(offset);
79	unsigned long result = offset & ~(BITS_PER_LONG - 1);
80	unsigned long tmp;
81	unsigned long mask, submask;
82	unsigned long quot, rest;
83
84	if (offset >= size)
85		return size;
86
87	size -= result;
88	offset %= BITS_PER_LONG;
89	if (!offset)
90		goto aligned;
91
92	tmp = *(p++);
93	quot = (offset >> 3) << 3;
94	rest = offset & 0x7;
95	mask = ~0UL << quot;
96	submask = (unsigned char)(0xff << rest) >> rest;
97	submask <<= quot;
98	mask &= submask;
99	tmp &= mask;
100	if (size < BITS_PER_LONG)
101		goto found_first;
102	if (tmp)
103		goto found_middle;
104
105	size -= BITS_PER_LONG;
106	result += BITS_PER_LONG;
107aligned:
108	while (size & ~(BITS_PER_LONG-1)) {
109		tmp = *(p++);
110		if (tmp)
111			goto found_middle;
112		result += BITS_PER_LONG;
113		size -= BITS_PER_LONG;
114	}
115	if (!size)
116		return result;
117	tmp = *p;
118found_first:
119	tmp &= (~0UL >> (BITS_PER_LONG - size));
120	if (tmp == 0UL)		/* Are any bits set? */
121		return result + size;   /* Nope. */
122found_middle:
123	return result + __reverse_ffs(tmp);
124}
125
126static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
127			unsigned long size, unsigned long offset)
128{
129	const unsigned long *p = addr + BIT_WORD(offset);
130	unsigned long result = offset & ~(BITS_PER_LONG - 1);
131	unsigned long tmp;
132	unsigned long mask, submask;
133	unsigned long quot, rest;
134
135	if (offset >= size)
136		return size;
137
138	size -= result;
139	offset %= BITS_PER_LONG;
140	if (!offset)
141		goto aligned;
142
143	tmp = *(p++);
144	quot = (offset >> 3) << 3;
145	rest = offset & 0x7;
146	mask = ~(~0UL << quot);
147	submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
148	submask <<= quot;
149	mask += submask;
150	tmp |= mask;
151	if (size < BITS_PER_LONG)
152		goto found_first;
153	if (~tmp)
154		goto found_middle;
155
156	size -= BITS_PER_LONG;
157	result += BITS_PER_LONG;
158aligned:
159	while (size & ~(BITS_PER_LONG - 1)) {
160		tmp = *(p++);
161		if (~tmp)
162			goto found_middle;
163		result += BITS_PER_LONG;
164		size -= BITS_PER_LONG;
165	}
166	if (!size)
167		return result;
168	tmp = *p;
169
170found_first:
171	tmp |= ~0UL << size;
172	if (tmp == ~0UL)        /* Are any bits zero? */
173		return result + size;   /* Nope. */
174found_middle:
175	return result + __reverse_ffz(tmp);
176}
177
178void register_inmem_page(struct inode *inode, struct page *page)
179{
180	struct f2fs_inode_info *fi = F2FS_I(inode);
181	struct inmem_pages *new;
182	int err;
183
184	SetPagePrivate(page);
185	f2fs_trace_pid(page);
186
187	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
188
189	/* add atomic page indices to the list */
190	new->page = page;
191	INIT_LIST_HEAD(&new->list);
192retry:
193	/* increase reference count with clean state */
194	mutex_lock(&fi->inmem_lock);
195	err = radix_tree_insert(&fi->inmem_root, page->index, new);
196	if (err == -EEXIST) {
197		mutex_unlock(&fi->inmem_lock);
198		kmem_cache_free(inmem_entry_slab, new);
199		return;
200	} else if (err) {
201		mutex_unlock(&fi->inmem_lock);
202		goto retry;
203	}
204	get_page(page);
205	list_add_tail(&new->list, &fi->inmem_pages);
206	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
207	mutex_unlock(&fi->inmem_lock);
208
209	trace_f2fs_register_inmem_page(page, INMEM);
210}
211
212void commit_inmem_pages(struct inode *inode, bool abort)
213{
214	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
215	struct f2fs_inode_info *fi = F2FS_I(inode);
216	struct inmem_pages *cur, *tmp;
217	bool submit_bio = false;
218	struct f2fs_io_info fio = {
219		.type = DATA,
220		.rw = WRITE_SYNC | REQ_PRIO,
221	};
222
223	/*
224	 * The abort is true only when f2fs_evict_inode is called.
225	 * Basically, the f2fs_evict_inode doesn't produce any data writes, so
226	 * that we don't need to call f2fs_balance_fs.
227	 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
228	 * inode becomes free by iget_locked in f2fs_iget.
229	 */
230	if (!abort) {
231		f2fs_balance_fs(sbi);
232		f2fs_lock_op(sbi);
233	}
234
235	mutex_lock(&fi->inmem_lock);
236	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
237		if (!abort) {
238			lock_page(cur->page);
239			if (cur->page->mapping == inode->i_mapping) {
240				f2fs_wait_on_page_writeback(cur->page, DATA);
241				if (clear_page_dirty_for_io(cur->page))
242					inode_dec_dirty_pages(inode);
243				trace_f2fs_commit_inmem_page(cur->page, INMEM);
244				do_write_data_page(cur->page, &fio);
245				submit_bio = true;
246			}
247			f2fs_put_page(cur->page, 1);
248		} else {
249			trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
250			put_page(cur->page);
251		}
252		radix_tree_delete(&fi->inmem_root, cur->page->index);
253		list_del(&cur->list);
254		kmem_cache_free(inmem_entry_slab, cur);
255		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
256	}
257	mutex_unlock(&fi->inmem_lock);
258
259	if (!abort) {
260		f2fs_unlock_op(sbi);
261		if (submit_bio)
262			f2fs_submit_merged_bio(sbi, DATA, WRITE);
263	}
264}
265
266/*
267 * This function balances dirty node and dentry pages.
268 * In addition, it controls garbage collection.
269 */
270void f2fs_balance_fs(struct f2fs_sb_info *sbi)
271{
272	/*
273	 * We should do GC or end up with checkpoint, if there are so many dirty
274	 * dir/node pages without enough free segments.
275	 */
276	if (has_not_enough_free_secs(sbi, 0)) {
277		mutex_lock(&sbi->gc_mutex);
278		f2fs_gc(sbi);
279	}
280}
281
282void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
283{
284	/* try to shrink extent cache when there is no enough memory */
285	f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
286
287	/* check the # of cached NAT entries and prefree segments */
288	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
289			excess_prefree_segs(sbi) ||
290			!available_free_memory(sbi, INO_ENTRIES))
291		f2fs_sync_fs(sbi->sb, true);
292}
293
294static int issue_flush_thread(void *data)
295{
296	struct f2fs_sb_info *sbi = data;
297	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
298	wait_queue_head_t *q = &fcc->flush_wait_queue;
299repeat:
300	if (kthread_should_stop())
301		return 0;
302
303	if (!llist_empty(&fcc->issue_list)) {
304		struct bio *bio = bio_alloc(GFP_NOIO, 0);
305		struct flush_cmd *cmd, *next;
306		int ret;
307
308		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
309		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
310
311		bio->bi_bdev = sbi->sb->s_bdev;
312		ret = submit_bio_wait(WRITE_FLUSH, bio);
313
314		llist_for_each_entry_safe(cmd, next,
315					  fcc->dispatch_list, llnode) {
316			cmd->ret = ret;
317			complete(&cmd->wait);
318		}
319		bio_put(bio);
320		fcc->dispatch_list = NULL;
321	}
322
323	wait_event_interruptible(*q,
324		kthread_should_stop() || !llist_empty(&fcc->issue_list));
325	goto repeat;
326}
327
328int f2fs_issue_flush(struct f2fs_sb_info *sbi)
329{
330	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
331	struct flush_cmd cmd;
332
333	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
334					test_opt(sbi, FLUSH_MERGE));
335
336	if (test_opt(sbi, NOBARRIER))
337		return 0;
338
339	if (!test_opt(sbi, FLUSH_MERGE))
340		return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
341
342	init_completion(&cmd.wait);
343
344	llist_add(&cmd.llnode, &fcc->issue_list);
345
346	if (!fcc->dispatch_list)
347		wake_up(&fcc->flush_wait_queue);
348
349	wait_for_completion(&cmd.wait);
350
351	return cmd.ret;
352}
353
354int create_flush_cmd_control(struct f2fs_sb_info *sbi)
355{
356	dev_t dev = sbi->sb->s_bdev->bd_dev;
357	struct flush_cmd_control *fcc;
358	int err = 0;
359
360	fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
361	if (!fcc)
362		return -ENOMEM;
363	init_waitqueue_head(&fcc->flush_wait_queue);
364	init_llist_head(&fcc->issue_list);
365	SM_I(sbi)->cmd_control_info = fcc;
366	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
367				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
368	if (IS_ERR(fcc->f2fs_issue_flush)) {
369		err = PTR_ERR(fcc->f2fs_issue_flush);
370		kfree(fcc);
371		SM_I(sbi)->cmd_control_info = NULL;
372		return err;
373	}
374
375	return err;
376}
377
378void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
379{
380	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
381
382	if (fcc && fcc->f2fs_issue_flush)
383		kthread_stop(fcc->f2fs_issue_flush);
384	kfree(fcc);
385	SM_I(sbi)->cmd_control_info = NULL;
386}
387
388static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
389		enum dirty_type dirty_type)
390{
391	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
392
393	/* need not be added */
394	if (IS_CURSEG(sbi, segno))
395		return;
396
397	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
398		dirty_i->nr_dirty[dirty_type]++;
399
400	if (dirty_type == DIRTY) {
401		struct seg_entry *sentry = get_seg_entry(sbi, segno);
402		enum dirty_type t = sentry->type;
403
404		if (unlikely(t >= DIRTY)) {
405			f2fs_bug_on(sbi, 1);
406			return;
407		}
408		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
409			dirty_i->nr_dirty[t]++;
410	}
411}
412
413static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
414		enum dirty_type dirty_type)
415{
416	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
417
418	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
419		dirty_i->nr_dirty[dirty_type]--;
420
421	if (dirty_type == DIRTY) {
422		struct seg_entry *sentry = get_seg_entry(sbi, segno);
423		enum dirty_type t = sentry->type;
424
425		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
426			dirty_i->nr_dirty[t]--;
427
428		if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
429			clear_bit(GET_SECNO(sbi, segno),
430						dirty_i->victim_secmap);
431	}
432}
433
434/*
435 * Should not occur error such as -ENOMEM.
436 * Adding dirty entry into seglist is not critical operation.
437 * If a given segment is one of current working segments, it won't be added.
438 */
439static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
440{
441	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
442	unsigned short valid_blocks;
443
444	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
445		return;
446
447	mutex_lock(&dirty_i->seglist_lock);
448
449	valid_blocks = get_valid_blocks(sbi, segno, 0);
450
451	if (valid_blocks == 0) {
452		__locate_dirty_segment(sbi, segno, PRE);
453		__remove_dirty_segment(sbi, segno, DIRTY);
454	} else if (valid_blocks < sbi->blocks_per_seg) {
455		__locate_dirty_segment(sbi, segno, DIRTY);
456	} else {
457		/* Recovery routine with SSR needs this */
458		__remove_dirty_segment(sbi, segno, DIRTY);
459	}
460
461	mutex_unlock(&dirty_i->seglist_lock);
462}
463
464static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
465				block_t blkstart, block_t blklen)
466{
467	sector_t start = SECTOR_FROM_BLOCK(blkstart);
468	sector_t len = SECTOR_FROM_BLOCK(blklen);
469	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
470	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
471}
472
473void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
474{
475	if (f2fs_issue_discard(sbi, blkaddr, 1)) {
476		struct page *page = grab_meta_page(sbi, blkaddr);
477		/* zero-filled page */
478		set_page_dirty(page);
479		f2fs_put_page(page, 1);
480	}
481}
482
483static void __add_discard_entry(struct f2fs_sb_info *sbi,
484		struct cp_control *cpc, unsigned int start, unsigned int end)
485{
486	struct list_head *head = &SM_I(sbi)->discard_list;
487	struct discard_entry *new, *last;
488
489	if (!list_empty(head)) {
490		last = list_last_entry(head, struct discard_entry, list);
491		if (START_BLOCK(sbi, cpc->trim_start) + start ==
492						last->blkaddr + last->len) {
493			last->len += end - start;
494			goto done;
495		}
496	}
497
498	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
499	INIT_LIST_HEAD(&new->list);
500	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
501	new->len = end - start;
502	list_add_tail(&new->list, head);
503done:
504	SM_I(sbi)->nr_discards += end - start;
505	cpc->trimmed += end - start;
506}
507
508static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
509{
510	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
511	int max_blocks = sbi->blocks_per_seg;
512	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
513	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
514	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
515	unsigned long *dmap = SIT_I(sbi)->tmp_map;
516	unsigned int start = 0, end = -1;
517	bool force = (cpc->reason == CP_DISCARD);
518	int i;
519
520	if (!force && (!test_opt(sbi, DISCARD) ||
521			SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards))
522		return;
523
524	if (force && !se->valid_blocks) {
525		struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
526		/*
527		 * if this segment is registered in the prefree list, then
528		 * we should skip adding a discard candidate, and let the
529		 * checkpoint do that later.
530		 */
531		mutex_lock(&dirty_i->seglist_lock);
532		if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
533			mutex_unlock(&dirty_i->seglist_lock);
534			cpc->trimmed += sbi->blocks_per_seg;
535			return;
536		}
537		mutex_unlock(&dirty_i->seglist_lock);
538
539		__add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
540		return;
541	}
542
543	/* zero block will be discarded through the prefree list */
544	if (!se->valid_blocks || se->valid_blocks == max_blocks)
545		return;
546
547	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
548	for (i = 0; i < entries; i++)
549		dmap[i] = force ? ~ckpt_map[i] :
550				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
551
552	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
553		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
554		if (start >= max_blocks)
555			break;
556
557		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
558
559		if (force && end - start < cpc->trim_minlen)
560			continue;
561
562		__add_discard_entry(sbi, cpc, start, end);
563	}
564}
565
566void release_discard_addrs(struct f2fs_sb_info *sbi)
567{
568	struct list_head *head = &(SM_I(sbi)->discard_list);
569	struct discard_entry *entry, *this;
570
571	/* drop caches */
572	list_for_each_entry_safe(entry, this, head, list) {
573		list_del(&entry->list);
574		kmem_cache_free(discard_entry_slab, entry);
575	}
576}
577
578/*
579 * Should call clear_prefree_segments after checkpoint is done.
580 */
581static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
582{
583	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
584	unsigned int segno;
585
586	mutex_lock(&dirty_i->seglist_lock);
587	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
588		__set_test_and_free(sbi, segno);
589	mutex_unlock(&dirty_i->seglist_lock);
590}
591
592void clear_prefree_segments(struct f2fs_sb_info *sbi)
593{
594	struct list_head *head = &(SM_I(sbi)->discard_list);
595	struct discard_entry *entry, *this;
596	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
597	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
598	unsigned int start = 0, end = -1;
599
600	mutex_lock(&dirty_i->seglist_lock);
601
602	while (1) {
603		int i;
604		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
605		if (start >= MAIN_SEGS(sbi))
606			break;
607		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
608								start + 1);
609
610		for (i = start; i < end; i++)
611			clear_bit(i, prefree_map);
612
613		dirty_i->nr_dirty[PRE] -= end - start;
614
615		if (!test_opt(sbi, DISCARD))
616			continue;
617
618		f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
619				(end - start) << sbi->log_blocks_per_seg);
620	}
621	mutex_unlock(&dirty_i->seglist_lock);
622
623	/* send small discards */
624	list_for_each_entry_safe(entry, this, head, list) {
625		f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
626		list_del(&entry->list);
627		SM_I(sbi)->nr_discards -= entry->len;
628		kmem_cache_free(discard_entry_slab, entry);
629	}
630}
631
632static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
633{
634	struct sit_info *sit_i = SIT_I(sbi);
635
636	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
637		sit_i->dirty_sentries++;
638		return false;
639	}
640
641	return true;
642}
643
644static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
645					unsigned int segno, int modified)
646{
647	struct seg_entry *se = get_seg_entry(sbi, segno);
648	se->type = type;
649	if (modified)
650		__mark_sit_entry_dirty(sbi, segno);
651}
652
653static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
654{
655	struct seg_entry *se;
656	unsigned int segno, offset;
657	long int new_vblocks;
658
659	segno = GET_SEGNO(sbi, blkaddr);
660
661	se = get_seg_entry(sbi, segno);
662	new_vblocks = se->valid_blocks + del;
663	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
664
665	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
666				(new_vblocks > sbi->blocks_per_seg)));
667
668	se->valid_blocks = new_vblocks;
669	se->mtime = get_mtime(sbi);
670	SIT_I(sbi)->max_mtime = se->mtime;
671
672	/* Update valid block bitmap */
673	if (del > 0) {
674		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
675			f2fs_bug_on(sbi, 1);
676	} else {
677		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
678			f2fs_bug_on(sbi, 1);
679	}
680	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
681		se->ckpt_valid_blocks += del;
682
683	__mark_sit_entry_dirty(sbi, segno);
684
685	/* update total number of valid blocks to be written in ckpt area */
686	SIT_I(sbi)->written_valid_blocks += del;
687
688	if (sbi->segs_per_sec > 1)
689		get_sec_entry(sbi, segno)->valid_blocks += del;
690}
691
692void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
693{
694	update_sit_entry(sbi, new, 1);
695	if (GET_SEGNO(sbi, old) != NULL_SEGNO)
696		update_sit_entry(sbi, old, -1);
697
698	locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
699	locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
700}
701
702void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
703{
704	unsigned int segno = GET_SEGNO(sbi, addr);
705	struct sit_info *sit_i = SIT_I(sbi);
706
707	f2fs_bug_on(sbi, addr == NULL_ADDR);
708	if (addr == NEW_ADDR)
709		return;
710
711	/* add it into sit main buffer */
712	mutex_lock(&sit_i->sentry_lock);
713
714	update_sit_entry(sbi, addr, -1);
715
716	/* add it into dirty seglist */
717	locate_dirty_segment(sbi, segno);
718
719	mutex_unlock(&sit_i->sentry_lock);
720}
721
722/*
723 * This function should be resided under the curseg_mutex lock
724 */
725static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
726					struct f2fs_summary *sum)
727{
728	struct curseg_info *curseg = CURSEG_I(sbi, type);
729	void *addr = curseg->sum_blk;
730	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
731	memcpy(addr, sum, sizeof(struct f2fs_summary));
732}
733
734/*
735 * Calculate the number of current summary pages for writing
736 */
737int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
738{
739	int valid_sum_count = 0;
740	int i, sum_in_page;
741
742	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
743		if (sbi->ckpt->alloc_type[i] == SSR)
744			valid_sum_count += sbi->blocks_per_seg;
745		else {
746			if (for_ra)
747				valid_sum_count += le16_to_cpu(
748					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
749			else
750				valid_sum_count += curseg_blkoff(sbi, i);
751		}
752	}
753
754	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
755			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
756	if (valid_sum_count <= sum_in_page)
757		return 1;
758	else if ((valid_sum_count - sum_in_page) <=
759		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
760		return 2;
761	return 3;
762}
763
764/*
765 * Caller should put this summary page
766 */
767struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
768{
769	return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
770}
771
772static void write_sum_page(struct f2fs_sb_info *sbi,
773			struct f2fs_summary_block *sum_blk, block_t blk_addr)
774{
775	struct page *page = grab_meta_page(sbi, blk_addr);
776	void *kaddr = page_address(page);
777	memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
778	set_page_dirty(page);
779	f2fs_put_page(page, 1);
780}
781
782static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
783{
784	struct curseg_info *curseg = CURSEG_I(sbi, type);
785	unsigned int segno = curseg->segno + 1;
786	struct free_segmap_info *free_i = FREE_I(sbi);
787
788	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
789		return !test_bit(segno, free_i->free_segmap);
790	return 0;
791}
792
793/*
794 * Find a new segment from the free segments bitmap to right order
795 * This function should be returned with success, otherwise BUG
796 */
797static void get_new_segment(struct f2fs_sb_info *sbi,
798			unsigned int *newseg, bool new_sec, int dir)
799{
800	struct free_segmap_info *free_i = FREE_I(sbi);
801	unsigned int segno, secno, zoneno;
802	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
803	unsigned int hint = *newseg / sbi->segs_per_sec;
804	unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
805	unsigned int left_start = hint;
806	bool init = true;
807	int go_left = 0;
808	int i;
809
810	spin_lock(&free_i->segmap_lock);
811
812	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
813		segno = find_next_zero_bit(free_i->free_segmap,
814					MAIN_SEGS(sbi), *newseg + 1);
815		if (segno - *newseg < sbi->segs_per_sec -
816					(*newseg % sbi->segs_per_sec))
817			goto got_it;
818	}
819find_other_zone:
820	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
821	if (secno >= MAIN_SECS(sbi)) {
822		if (dir == ALLOC_RIGHT) {
823			secno = find_next_zero_bit(free_i->free_secmap,
824							MAIN_SECS(sbi), 0);
825			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
826		} else {
827			go_left = 1;
828			left_start = hint - 1;
829		}
830	}
831	if (go_left == 0)
832		goto skip_left;
833
834	while (test_bit(left_start, free_i->free_secmap)) {
835		if (left_start > 0) {
836			left_start--;
837			continue;
838		}
839		left_start = find_next_zero_bit(free_i->free_secmap,
840							MAIN_SECS(sbi), 0);
841		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
842		break;
843	}
844	secno = left_start;
845skip_left:
846	hint = secno;
847	segno = secno * sbi->segs_per_sec;
848	zoneno = secno / sbi->secs_per_zone;
849
850	/* give up on finding another zone */
851	if (!init)
852		goto got_it;
853	if (sbi->secs_per_zone == 1)
854		goto got_it;
855	if (zoneno == old_zoneno)
856		goto got_it;
857	if (dir == ALLOC_LEFT) {
858		if (!go_left && zoneno + 1 >= total_zones)
859			goto got_it;
860		if (go_left && zoneno == 0)
861			goto got_it;
862	}
863	for (i = 0; i < NR_CURSEG_TYPE; i++)
864		if (CURSEG_I(sbi, i)->zone == zoneno)
865			break;
866
867	if (i < NR_CURSEG_TYPE) {
868		/* zone is in user, try another */
869		if (go_left)
870			hint = zoneno * sbi->secs_per_zone - 1;
871		else if (zoneno + 1 >= total_zones)
872			hint = 0;
873		else
874			hint = (zoneno + 1) * sbi->secs_per_zone;
875		init = false;
876		goto find_other_zone;
877	}
878got_it:
879	/* set it as dirty segment in free segmap */
880	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
881	__set_inuse(sbi, segno);
882	*newseg = segno;
883	spin_unlock(&free_i->segmap_lock);
884}
885
886static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
887{
888	struct curseg_info *curseg = CURSEG_I(sbi, type);
889	struct summary_footer *sum_footer;
890
891	curseg->segno = curseg->next_segno;
892	curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
893	curseg->next_blkoff = 0;
894	curseg->next_segno = NULL_SEGNO;
895
896	sum_footer = &(curseg->sum_blk->footer);
897	memset(sum_footer, 0, sizeof(struct summary_footer));
898	if (IS_DATASEG(type))
899		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
900	if (IS_NODESEG(type))
901		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
902	__set_sit_entry_type(sbi, type, curseg->segno, modified);
903}
904
905/*
906 * Allocate a current working segment.
907 * This function always allocates a free segment in LFS manner.
908 */
909static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
910{
911	struct curseg_info *curseg = CURSEG_I(sbi, type);
912	unsigned int segno = curseg->segno;
913	int dir = ALLOC_LEFT;
914
915	write_sum_page(sbi, curseg->sum_blk,
916				GET_SUM_BLOCK(sbi, segno));
917	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
918		dir = ALLOC_RIGHT;
919
920	if (test_opt(sbi, NOHEAP))
921		dir = ALLOC_RIGHT;
922
923	get_new_segment(sbi, &segno, new_sec, dir);
924	curseg->next_segno = segno;
925	reset_curseg(sbi, type, 1);
926	curseg->alloc_type = LFS;
927}
928
929static void __next_free_blkoff(struct f2fs_sb_info *sbi,
930			struct curseg_info *seg, block_t start)
931{
932	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
933	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
934	unsigned long *target_map = SIT_I(sbi)->tmp_map;
935	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
936	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
937	int i, pos;
938
939	for (i = 0; i < entries; i++)
940		target_map[i] = ckpt_map[i] | cur_map[i];
941
942	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
943
944	seg->next_blkoff = pos;
945}
946
947/*
948 * If a segment is written by LFS manner, next block offset is just obtained
949 * by increasing the current block offset. However, if a segment is written by
950 * SSR manner, next block offset obtained by calling __next_free_blkoff
951 */
952static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
953				struct curseg_info *seg)
954{
955	if (seg->alloc_type == SSR)
956		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
957	else
958		seg->next_blkoff++;
959}
960
961/*
962 * This function always allocates a used segment(from dirty seglist) by SSR
963 * manner, so it should recover the existing segment information of valid blocks
964 */
965static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
966{
967	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
968	struct curseg_info *curseg = CURSEG_I(sbi, type);
969	unsigned int new_segno = curseg->next_segno;
970	struct f2fs_summary_block *sum_node;
971	struct page *sum_page;
972
973	write_sum_page(sbi, curseg->sum_blk,
974				GET_SUM_BLOCK(sbi, curseg->segno));
975	__set_test_and_inuse(sbi, new_segno);
976
977	mutex_lock(&dirty_i->seglist_lock);
978	__remove_dirty_segment(sbi, new_segno, PRE);
979	__remove_dirty_segment(sbi, new_segno, DIRTY);
980	mutex_unlock(&dirty_i->seglist_lock);
981
982	reset_curseg(sbi, type, 1);
983	curseg->alloc_type = SSR;
984	__next_free_blkoff(sbi, curseg, 0);
985
986	if (reuse) {
987		sum_page = get_sum_page(sbi, new_segno);
988		sum_node = (struct f2fs_summary_block *)page_address(sum_page);
989		memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
990		f2fs_put_page(sum_page, 1);
991	}
992}
993
994static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
995{
996	struct curseg_info *curseg = CURSEG_I(sbi, type);
997	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
998
999	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1000		return v_ops->get_victim(sbi,
1001				&(curseg)->next_segno, BG_GC, type, SSR);
1002
1003	/* For data segments, let's do SSR more intensively */
1004	for (; type >= CURSEG_HOT_DATA; type--)
1005		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1006						BG_GC, type, SSR))
1007			return 1;
1008	return 0;
1009}
1010
1011/*
1012 * flush out current segment and replace it with new segment
1013 * This function should be returned with success, otherwise BUG
1014 */
1015static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1016						int type, bool force)
1017{
1018	struct curseg_info *curseg = CURSEG_I(sbi, type);
1019
1020	if (force)
1021		new_curseg(sbi, type, true);
1022	else if (type == CURSEG_WARM_NODE)
1023		new_curseg(sbi, type, false);
1024	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1025		new_curseg(sbi, type, false);
1026	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1027		change_curseg(sbi, type, true);
1028	else
1029		new_curseg(sbi, type, false);
1030
1031	stat_inc_seg_type(sbi, curseg);
1032}
1033
1034static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1035{
1036	struct curseg_info *curseg = CURSEG_I(sbi, type);
1037	unsigned int old_segno;
1038
1039	old_segno = curseg->segno;
1040	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1041	locate_dirty_segment(sbi, old_segno);
1042}
1043
1044void allocate_new_segments(struct f2fs_sb_info *sbi)
1045{
1046	int i;
1047
1048	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1049		__allocate_new_segments(sbi, i);
1050}
1051
1052static const struct segment_allocation default_salloc_ops = {
1053	.allocate_segment = allocate_segment_by_default,
1054};
1055
1056int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1057{
1058	__u64 start = F2FS_BYTES_TO_BLK(range->start);
1059	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1060	unsigned int start_segno, end_segno;
1061	struct cp_control cpc;
1062
1063	if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1064						range->len < sbi->blocksize)
1065		return -EINVAL;
1066
1067	cpc.trimmed = 0;
1068	if (end <= MAIN_BLKADDR(sbi))
1069		goto out;
1070
1071	/* start/end segment number in main_area */
1072	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1073	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1074						GET_SEGNO(sbi, end);
1075	cpc.reason = CP_DISCARD;
1076	cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen);
1077
1078	/* do checkpoint to issue discard commands safely */
1079	for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1080		cpc.trim_start = start_segno;
1081		cpc.trim_end = min_t(unsigned int, rounddown(start_segno +
1082				BATCHED_TRIM_SEGMENTS(sbi),
1083				sbi->segs_per_sec) - 1, end_segno);
1084
1085		mutex_lock(&sbi->gc_mutex);
1086		write_checkpoint(sbi, &cpc);
1087		mutex_unlock(&sbi->gc_mutex);
1088	}
1089out:
1090	range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1091	return 0;
1092}
1093
1094static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1095{
1096	struct curseg_info *curseg = CURSEG_I(sbi, type);
1097	if (curseg->next_blkoff < sbi->blocks_per_seg)
1098		return true;
1099	return false;
1100}
1101
1102static int __get_segment_type_2(struct page *page, enum page_type p_type)
1103{
1104	if (p_type == DATA)
1105		return CURSEG_HOT_DATA;
1106	else
1107		return CURSEG_HOT_NODE;
1108}
1109
1110static int __get_segment_type_4(struct page *page, enum page_type p_type)
1111{
1112	if (p_type == DATA) {
1113		struct inode *inode = page->mapping->host;
1114
1115		if (S_ISDIR(inode->i_mode))
1116			return CURSEG_HOT_DATA;
1117		else
1118			return CURSEG_COLD_DATA;
1119	} else {
1120		if (IS_DNODE(page) && is_cold_node(page))
1121			return CURSEG_WARM_NODE;
1122		else
1123			return CURSEG_COLD_NODE;
1124	}
1125}
1126
1127static int __get_segment_type_6(struct page *page, enum page_type p_type)
1128{
1129	if (p_type == DATA) {
1130		struct inode *inode = page->mapping->host;
1131
1132		if (S_ISDIR(inode->i_mode))
1133			return CURSEG_HOT_DATA;
1134		else if (is_cold_data(page) || file_is_cold(inode))
1135			return CURSEG_COLD_DATA;
1136		else
1137			return CURSEG_WARM_DATA;
1138	} else {
1139		if (IS_DNODE(page))
1140			return is_cold_node(page) ? CURSEG_WARM_NODE :
1141						CURSEG_HOT_NODE;
1142		else
1143			return CURSEG_COLD_NODE;
1144	}
1145}
1146
1147static int __get_segment_type(struct page *page, enum page_type p_type)
1148{
1149	switch (F2FS_P_SB(page)->active_logs) {
1150	case 2:
1151		return __get_segment_type_2(page, p_type);
1152	case 4:
1153		return __get_segment_type_4(page, p_type);
1154	}
1155	/* NR_CURSEG_TYPE(6) logs by default */
1156	f2fs_bug_on(F2FS_P_SB(page),
1157		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1158	return __get_segment_type_6(page, p_type);
1159}
1160
1161void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1162		block_t old_blkaddr, block_t *new_blkaddr,
1163		struct f2fs_summary *sum, int type)
1164{
1165	struct sit_info *sit_i = SIT_I(sbi);
1166	struct curseg_info *curseg;
1167	bool direct_io = (type == CURSEG_DIRECT_IO);
1168
1169	type = direct_io ? CURSEG_WARM_DATA : type;
1170
1171	curseg = CURSEG_I(sbi, type);
1172
1173	mutex_lock(&curseg->curseg_mutex);
1174	mutex_lock(&sit_i->sentry_lock);
1175
1176	/* direct_io'ed data is aligned to the segment for better performance */
1177	if (direct_io && curseg->next_blkoff)
1178		__allocate_new_segments(sbi, type);
1179
1180	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1181
1182	/*
1183	 * __add_sum_entry should be resided under the curseg_mutex
1184	 * because, this function updates a summary entry in the
1185	 * current summary block.
1186	 */
1187	__add_sum_entry(sbi, type, sum);
1188
1189	__refresh_next_blkoff(sbi, curseg);
1190
1191	stat_inc_block_count(sbi, curseg);
1192
1193	if (!__has_curseg_space(sbi, type))
1194		sit_i->s_ops->allocate_segment(sbi, type, false);
1195	/*
1196	 * SIT information should be updated before segment allocation,
1197	 * since SSR needs latest valid block information.
1198	 */
1199	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1200
1201	mutex_unlock(&sit_i->sentry_lock);
1202
1203	if (page && IS_NODESEG(type))
1204		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1205
1206	mutex_unlock(&curseg->curseg_mutex);
1207}
1208
1209static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1210			struct f2fs_summary *sum,
1211			struct f2fs_io_info *fio)
1212{
1213	int type = __get_segment_type(page, fio->type);
1214
1215	allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);
1216
1217	/* writeout dirty page into bdev */
1218	f2fs_submit_page_mbio(sbi, page, fio);
1219}
1220
1221void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1222{
1223	struct f2fs_io_info fio = {
1224		.type = META,
1225		.rw = WRITE_SYNC | REQ_META | REQ_PRIO,
1226		.blk_addr = page->index,
1227	};
1228
1229	set_page_writeback(page);
1230	f2fs_submit_page_mbio(sbi, page, &fio);
1231}
1232
1233void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1234			unsigned int nid, struct f2fs_io_info *fio)
1235{
1236	struct f2fs_summary sum;
1237	set_summary(&sum, nid, 0, 0);
1238	do_write_page(sbi, page, &sum, fio);
1239}
1240
1241void write_data_page(struct page *page, struct dnode_of_data *dn,
1242				struct f2fs_io_info *fio)
1243{
1244	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1245	struct f2fs_summary sum;
1246	struct node_info ni;
1247
1248	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1249	get_node_info(sbi, dn->nid, &ni);
1250	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1251	do_write_page(sbi, page, &sum, fio);
1252	dn->data_blkaddr = fio->blk_addr;
1253}
1254
1255void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
1256{
1257	stat_inc_inplace_blocks(F2FS_P_SB(page));
1258	f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
1259}
1260
1261void recover_data_page(struct f2fs_sb_info *sbi,
1262			struct page *page, struct f2fs_summary *sum,
1263			block_t old_blkaddr, block_t new_blkaddr)
1264{
1265	struct sit_info *sit_i = SIT_I(sbi);
1266	struct curseg_info *curseg;
1267	unsigned int segno, old_cursegno;
1268	struct seg_entry *se;
1269	int type;
1270
1271	segno = GET_SEGNO(sbi, new_blkaddr);
1272	se = get_seg_entry(sbi, segno);
1273	type = se->type;
1274
1275	if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1276		if (old_blkaddr == NULL_ADDR)
1277			type = CURSEG_COLD_DATA;
1278		else
1279			type = CURSEG_WARM_DATA;
1280	}
1281	curseg = CURSEG_I(sbi, type);
1282
1283	mutex_lock(&curseg->curseg_mutex);
1284	mutex_lock(&sit_i->sentry_lock);
1285
1286	old_cursegno = curseg->segno;
1287
1288	/* change the current segment */
1289	if (segno != curseg->segno) {
1290		curseg->next_segno = segno;
1291		change_curseg(sbi, type, true);
1292	}
1293
1294	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1295	__add_sum_entry(sbi, type, sum);
1296
1297	refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1298	locate_dirty_segment(sbi, old_cursegno);
1299
1300	mutex_unlock(&sit_i->sentry_lock);
1301	mutex_unlock(&curseg->curseg_mutex);
1302}
1303
1304static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1305					struct page *page, enum page_type type)
1306{
1307	enum page_type btype = PAGE_TYPE_OF_BIO(type);
1308	struct f2fs_bio_info *io = &sbi->write_io[btype];
1309	struct bio_vec *bvec;
1310	int i;
1311
1312	down_read(&io->io_rwsem);
1313	if (!io->bio)
1314		goto out;
1315
1316	bio_for_each_segment_all(bvec, io->bio, i) {
1317		if (page == bvec->bv_page) {
1318			up_read(&io->io_rwsem);
1319			return true;
1320		}
1321	}
1322
1323out:
1324	up_read(&io->io_rwsem);
1325	return false;
1326}
1327
1328void f2fs_wait_on_page_writeback(struct page *page,
1329				enum page_type type)
1330{
1331	if (PageWriteback(page)) {
1332		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1333
1334		if (is_merged_page(sbi, page, type))
1335			f2fs_submit_merged_bio(sbi, type, WRITE);
1336		wait_on_page_writeback(page);
1337	}
1338}
1339
1340static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1341{
1342	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1343	struct curseg_info *seg_i;
1344	unsigned char *kaddr;
1345	struct page *page;
1346	block_t start;
1347	int i, j, offset;
1348
1349	start = start_sum_block(sbi);
1350
1351	page = get_meta_page(sbi, start++);
1352	kaddr = (unsigned char *)page_address(page);
1353
1354	/* Step 1: restore nat cache */
1355	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1356	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1357
1358	/* Step 2: restore sit cache */
1359	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1360	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1361						SUM_JOURNAL_SIZE);
1362	offset = 2 * SUM_JOURNAL_SIZE;
1363
1364	/* Step 3: restore summary entries */
1365	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1366		unsigned short blk_off;
1367		unsigned int segno;
1368
1369		seg_i = CURSEG_I(sbi, i);
1370		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1371		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1372		seg_i->next_segno = segno;
1373		reset_curseg(sbi, i, 0);
1374		seg_i->alloc_type = ckpt->alloc_type[i];
1375		seg_i->next_blkoff = blk_off;
1376
1377		if (seg_i->alloc_type == SSR)
1378			blk_off = sbi->blocks_per_seg;
1379
1380		for (j = 0; j < blk_off; j++) {
1381			struct f2fs_summary *s;
1382			s = (struct f2fs_summary *)(kaddr + offset);
1383			seg_i->sum_blk->entries[j] = *s;
1384			offset += SUMMARY_SIZE;
1385			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1386						SUM_FOOTER_SIZE)
1387				continue;
1388
1389			f2fs_put_page(page, 1);
1390			page = NULL;
1391
1392			page = get_meta_page(sbi, start++);
1393			kaddr = (unsigned char *)page_address(page);
1394			offset = 0;
1395		}
1396	}
1397	f2fs_put_page(page, 1);
1398	return 0;
1399}
1400
1401static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1402{
1403	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1404	struct f2fs_summary_block *sum;
1405	struct curseg_info *curseg;
1406	struct page *new;
1407	unsigned short blk_off;
1408	unsigned int segno = 0;
1409	block_t blk_addr = 0;
1410
1411	/* get segment number and block addr */
1412	if (IS_DATASEG(type)) {
1413		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1414		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1415							CURSEG_HOT_DATA]);
1416		if (__exist_node_summaries(sbi))
1417			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1418		else
1419			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1420	} else {
1421		segno = le32_to_cpu(ckpt->cur_node_segno[type -
1422							CURSEG_HOT_NODE]);
1423		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1424							CURSEG_HOT_NODE]);
1425		if (__exist_node_summaries(sbi))
1426			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1427							type - CURSEG_HOT_NODE);
1428		else
1429			blk_addr = GET_SUM_BLOCK(sbi, segno);
1430	}
1431
1432	new = get_meta_page(sbi, blk_addr);
1433	sum = (struct f2fs_summary_block *)page_address(new);
1434
1435	if (IS_NODESEG(type)) {
1436		if (__exist_node_summaries(sbi)) {
1437			struct f2fs_summary *ns = &sum->entries[0];
1438			int i;
1439			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1440				ns->version = 0;
1441				ns->ofs_in_node = 0;
1442			}
1443		} else {
1444			int err;
1445
1446			err = restore_node_summary(sbi, segno, sum);
1447			if (err) {
1448				f2fs_put_page(new, 1);
1449				return err;
1450			}
1451		}
1452	}
1453
1454	/* set uncompleted segment to curseg */
1455	curseg = CURSEG_I(sbi, type);
1456	mutex_lock(&curseg->curseg_mutex);
1457	memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1458	curseg->next_segno = segno;
1459	reset_curseg(sbi, type, 0);
1460	curseg->alloc_type = ckpt->alloc_type[type];
1461	curseg->next_blkoff = blk_off;
1462	mutex_unlock(&curseg->curseg_mutex);
1463	f2fs_put_page(new, 1);
1464	return 0;
1465}
1466
1467static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1468{
1469	int type = CURSEG_HOT_DATA;
1470	int err;
1471
1472	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1473		int npages = npages_for_summary_flush(sbi, true);
1474
1475		if (npages >= 2)
1476			ra_meta_pages(sbi, start_sum_block(sbi), npages,
1477								META_CP);
1478
1479		/* restore for compacted data summary */
1480		if (read_compacted_summaries(sbi))
1481			return -EINVAL;
1482		type = CURSEG_HOT_NODE;
1483	}
1484
1485	if (__exist_node_summaries(sbi))
1486		ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1487					NR_CURSEG_TYPE - type, META_CP);
1488
1489	for (; type <= CURSEG_COLD_NODE; type++) {
1490		err = read_normal_summaries(sbi, type);
1491		if (err)
1492			return err;
1493	}
1494
1495	return 0;
1496}
1497
1498static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1499{
1500	struct page *page;
1501	unsigned char *kaddr;
1502	struct f2fs_summary *summary;
1503	struct curseg_info *seg_i;
1504	int written_size = 0;
1505	int i, j;
1506
1507	page = grab_meta_page(sbi, blkaddr++);
1508	kaddr = (unsigned char *)page_address(page);
1509
1510	/* Step 1: write nat cache */
1511	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1512	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1513	written_size += SUM_JOURNAL_SIZE;
1514
1515	/* Step 2: write sit cache */
1516	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1517	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1518						SUM_JOURNAL_SIZE);
1519	written_size += SUM_JOURNAL_SIZE;
1520
1521	/* Step 3: write summary entries */
1522	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1523		unsigned short blkoff;
1524		seg_i = CURSEG_I(sbi, i);
1525		if (sbi->ckpt->alloc_type[i] == SSR)
1526			blkoff = sbi->blocks_per_seg;
1527		else
1528			blkoff = curseg_blkoff(sbi, i);
1529
1530		for (j = 0; j < blkoff; j++) {
1531			if (!page) {
1532				page = grab_meta_page(sbi, blkaddr++);
1533				kaddr = (unsigned char *)page_address(page);
1534				written_size = 0;
1535			}
1536			summary = (struct f2fs_summary *)(kaddr + written_size);
1537			*summary = seg_i->sum_blk->entries[j];
1538			written_size += SUMMARY_SIZE;
1539
1540			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1541							SUM_FOOTER_SIZE)
1542				continue;
1543
1544			set_page_dirty(page);
1545			f2fs_put_page(page, 1);
1546			page = NULL;
1547		}
1548	}
1549	if (page) {
1550		set_page_dirty(page);
1551		f2fs_put_page(page, 1);
1552	}
1553}
1554
1555static void write_normal_summaries(struct f2fs_sb_info *sbi,
1556					block_t blkaddr, int type)
1557{
1558	int i, end;
1559	if (IS_DATASEG(type))
1560		end = type + NR_CURSEG_DATA_TYPE;
1561	else
1562		end = type + NR_CURSEG_NODE_TYPE;
1563
1564	for (i = type; i < end; i++) {
1565		struct curseg_info *sum = CURSEG_I(sbi, i);
1566		mutex_lock(&sum->curseg_mutex);
1567		write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1568		mutex_unlock(&sum->curseg_mutex);
1569	}
1570}
1571
1572void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1573{
1574	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1575		write_compacted_summaries(sbi, start_blk);
1576	else
1577		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1578}
1579
1580void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1581{
1582	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1583}
1584
1585int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1586					unsigned int val, int alloc)
1587{
1588	int i;
1589
1590	if (type == NAT_JOURNAL) {
1591		for (i = 0; i < nats_in_cursum(sum); i++) {
1592			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1593				return i;
1594		}
1595		if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1596			return update_nats_in_cursum(sum, 1);
1597	} else if (type == SIT_JOURNAL) {
1598		for (i = 0; i < sits_in_cursum(sum); i++)
1599			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1600				return i;
1601		if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1602			return update_sits_in_cursum(sum, 1);
1603	}
1604	return -1;
1605}
1606
1607static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1608					unsigned int segno)
1609{
1610	return get_meta_page(sbi, current_sit_addr(sbi, segno));
1611}
1612
1613static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1614					unsigned int start)
1615{
1616	struct sit_info *sit_i = SIT_I(sbi);
1617	struct page *src_page, *dst_page;
1618	pgoff_t src_off, dst_off;
1619	void *src_addr, *dst_addr;
1620
1621	src_off = current_sit_addr(sbi, start);
1622	dst_off = next_sit_addr(sbi, src_off);
1623
1624	/* get current sit block page without lock */
1625	src_page = get_meta_page(sbi, src_off);
1626	dst_page = grab_meta_page(sbi, dst_off);
1627	f2fs_bug_on(sbi, PageDirty(src_page));
1628
1629	src_addr = page_address(src_page);
1630	dst_addr = page_address(dst_page);
1631	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1632
1633	set_page_dirty(dst_page);
1634	f2fs_put_page(src_page, 1);
1635
1636	set_to_next_sit(sit_i, start);
1637
1638	return dst_page;
1639}
1640
1641static struct sit_entry_set *grab_sit_entry_set(void)
1642{
1643	struct sit_entry_set *ses =
1644			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1645
1646	ses->entry_cnt = 0;
1647	INIT_LIST_HEAD(&ses->set_list);
1648	return ses;
1649}
1650
1651static void release_sit_entry_set(struct sit_entry_set *ses)
1652{
1653	list_del(&ses->set_list);
1654	kmem_cache_free(sit_entry_set_slab, ses);
1655}
1656
1657static void adjust_sit_entry_set(struct sit_entry_set *ses,
1658						struct list_head *head)
1659{
1660	struct sit_entry_set *next = ses;
1661
1662	if (list_is_last(&ses->set_list, head))
1663		return;
1664
1665	list_for_each_entry_continue(next, head, set_list)
1666		if (ses->entry_cnt <= next->entry_cnt)
1667			break;
1668
1669	list_move_tail(&ses->set_list, &next->set_list);
1670}
1671
1672static void add_sit_entry(unsigned int segno, struct list_head *head)
1673{
1674	struct sit_entry_set *ses;
1675	unsigned int start_segno = START_SEGNO(segno);
1676
1677	list_for_each_entry(ses, head, set_list) {
1678		if (ses->start_segno == start_segno) {
1679			ses->entry_cnt++;
1680			adjust_sit_entry_set(ses, head);
1681			return;
1682		}
1683	}
1684
1685	ses = grab_sit_entry_set();
1686
1687	ses->start_segno = start_segno;
1688	ses->entry_cnt++;
1689	list_add(&ses->set_list, head);
1690}
1691
1692static void add_sits_in_set(struct f2fs_sb_info *sbi)
1693{
1694	struct f2fs_sm_info *sm_info = SM_I(sbi);
1695	struct list_head *set_list = &sm_info->sit_entry_set;
1696	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1697	unsigned int segno;
1698
1699	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1700		add_sit_entry(segno, set_list);
1701}
1702
1703static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1704{
1705	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1706	struct f2fs_summary_block *sum = curseg->sum_blk;
1707	int i;
1708
1709	for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1710		unsigned int segno;
1711		bool dirtied;
1712
1713		segno = le32_to_cpu(segno_in_journal(sum, i));
1714		dirtied = __mark_sit_entry_dirty(sbi, segno);
1715
1716		if (!dirtied)
1717			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1718	}
1719	update_sits_in_cursum(sum, -sits_in_cursum(sum));
1720}
1721
1722/*
1723 * CP calls this function, which flushes SIT entries including sit_journal,
1724 * and moves prefree segs to free segs.
1725 */
1726void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1727{
1728	struct sit_info *sit_i = SIT_I(sbi);
1729	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1730	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1731	struct f2fs_summary_block *sum = curseg->sum_blk;
1732	struct sit_entry_set *ses, *tmp;
1733	struct list_head *head = &SM_I(sbi)->sit_entry_set;
1734	bool to_journal = true;
1735	struct seg_entry *se;
1736
1737	mutex_lock(&curseg->curseg_mutex);
1738	mutex_lock(&sit_i->sentry_lock);
1739
1740	if (!sit_i->dirty_sentries)
1741		goto out;
1742
1743	/*
1744	 * add and account sit entries of dirty bitmap in sit entry
1745	 * set temporarily
1746	 */
1747	add_sits_in_set(sbi);
1748
1749	/*
1750	 * if there are no enough space in journal to store dirty sit
1751	 * entries, remove all entries from journal and add and account
1752	 * them in sit entry set.
1753	 */
1754	if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1755		remove_sits_in_journal(sbi);
1756
1757	/*
1758	 * there are two steps to flush sit entries:
1759	 * #1, flush sit entries to journal in current cold data summary block.
1760	 * #2, flush sit entries to sit page.
1761	 */
1762	list_for_each_entry_safe(ses, tmp, head, set_list) {
1763		struct page *page = NULL;
1764		struct f2fs_sit_block *raw_sit = NULL;
1765		unsigned int start_segno = ses->start_segno;
1766		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1767						(unsigned long)MAIN_SEGS(sbi));
1768		unsigned int segno = start_segno;
1769
1770		if (to_journal &&
1771			!__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1772			to_journal = false;
1773
1774		if (!to_journal) {
1775			page = get_next_sit_page(sbi, start_segno);
1776			raw_sit = page_address(page);
1777		}
1778
1779		/* flush dirty sit entries in region of current sit set */
1780		for_each_set_bit_from(segno, bitmap, end) {
1781			int offset, sit_offset;
1782
1783			se = get_seg_entry(sbi, segno);
1784
1785			/* add discard candidates */
1786			if (cpc->reason != CP_DISCARD) {
1787				cpc->trim_start = segno;
1788				add_discard_addrs(sbi, cpc);
1789			}
1790
1791			if (to_journal) {
1792				offset = lookup_journal_in_cursum(sum,
1793							SIT_JOURNAL, segno, 1);
1794				f2fs_bug_on(sbi, offset < 0);
1795				segno_in_journal(sum, offset) =
1796							cpu_to_le32(segno);
1797				seg_info_to_raw_sit(se,
1798						&sit_in_journal(sum, offset));
1799			} else {
1800				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1801				seg_info_to_raw_sit(se,
1802						&raw_sit->entries[sit_offset]);
1803			}
1804
1805			__clear_bit(segno, bitmap);
1806			sit_i->dirty_sentries--;
1807			ses->entry_cnt--;
1808		}
1809
1810		if (!to_journal)
1811			f2fs_put_page(page, 1);
1812
1813		f2fs_bug_on(sbi, ses->entry_cnt);
1814		release_sit_entry_set(ses);
1815	}
1816
1817	f2fs_bug_on(sbi, !list_empty(head));
1818	f2fs_bug_on(sbi, sit_i->dirty_sentries);
1819out:
1820	if (cpc->reason == CP_DISCARD) {
1821		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1822			add_discard_addrs(sbi, cpc);
1823	}
1824	mutex_unlock(&sit_i->sentry_lock);
1825	mutex_unlock(&curseg->curseg_mutex);
1826
1827	set_prefree_as_free_segments(sbi);
1828}
1829
1830static int build_sit_info(struct f2fs_sb_info *sbi)
1831{
1832	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1833	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1834	struct sit_info *sit_i;
1835	unsigned int sit_segs, start;
1836	char *src_bitmap, *dst_bitmap;
1837	unsigned int bitmap_size;
1838
1839	/* allocate memory for SIT information */
1840	sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1841	if (!sit_i)
1842		return -ENOMEM;
1843
1844	SM_I(sbi)->sit_info = sit_i;
1845
1846	sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1847	if (!sit_i->sentries)
1848		return -ENOMEM;
1849
1850	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1851	sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1852	if (!sit_i->dirty_sentries_bitmap)
1853		return -ENOMEM;
1854
1855	for (start = 0; start < MAIN_SEGS(sbi); start++) {
1856		sit_i->sentries[start].cur_valid_map
1857			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1858		sit_i->sentries[start].ckpt_valid_map
1859			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1860		if (!sit_i->sentries[start].cur_valid_map
1861				|| !sit_i->sentries[start].ckpt_valid_map)
1862			return -ENOMEM;
1863	}
1864
1865	sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1866	if (!sit_i->tmp_map)
1867		return -ENOMEM;
1868
1869	if (sbi->segs_per_sec > 1) {
1870		sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1871					sizeof(struct sec_entry));
1872		if (!sit_i->sec_entries)
1873			return -ENOMEM;
1874	}
1875
1876	/* get information related with SIT */
1877	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1878
1879	/* setup SIT bitmap from ckeckpoint pack */
1880	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1881	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1882
1883	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1884	if (!dst_bitmap)
1885		return -ENOMEM;
1886
1887	/* init SIT information */
1888	sit_i->s_ops = &default_salloc_ops;
1889
1890	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1891	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1892	sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1893	sit_i->sit_bitmap = dst_bitmap;
1894	sit_i->bitmap_size = bitmap_size;
1895	sit_i->dirty_sentries = 0;
1896	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1897	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1898	sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1899	mutex_init(&sit_i->sentry_lock);
1900	return 0;
1901}
1902
1903static int build_free_segmap(struct f2fs_sb_info *sbi)
1904{
1905	struct free_segmap_info *free_i;
1906	unsigned int bitmap_size, sec_bitmap_size;
1907
1908	/* allocate memory for free segmap information */
1909	free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1910	if (!free_i)
1911		return -ENOMEM;
1912
1913	SM_I(sbi)->free_info = free_i;
1914
1915	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1916	free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1917	if (!free_i->free_segmap)
1918		return -ENOMEM;
1919
1920	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1921	free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1922	if (!free_i->free_secmap)
1923		return -ENOMEM;
1924
1925	/* set all segments as dirty temporarily */
1926	memset(free_i->free_segmap, 0xff, bitmap_size);
1927	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1928
1929	/* init free segmap information */
1930	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1931	free_i->free_segments = 0;
1932	free_i->free_sections = 0;
1933	spin_lock_init(&free_i->segmap_lock);
1934	return 0;
1935}
1936
1937static int build_curseg(struct f2fs_sb_info *sbi)
1938{
1939	struct curseg_info *array;
1940	int i;
1941
1942	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1943	if (!array)
1944		return -ENOMEM;
1945
1946	SM_I(sbi)->curseg_array = array;
1947
1948	for (i = 0; i < NR_CURSEG_TYPE; i++) {
1949		mutex_init(&array[i].curseg_mutex);
1950		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1951		if (!array[i].sum_blk)
1952			return -ENOMEM;
1953		array[i].segno = NULL_SEGNO;
1954		array[i].next_blkoff = 0;
1955	}
1956	return restore_curseg_summaries(sbi);
1957}
1958
1959static void build_sit_entries(struct f2fs_sb_info *sbi)
1960{
1961	struct sit_info *sit_i = SIT_I(sbi);
1962	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1963	struct f2fs_summary_block *sum = curseg->sum_blk;
1964	int sit_blk_cnt = SIT_BLK_CNT(sbi);
1965	unsigned int i, start, end;
1966	unsigned int readed, start_blk = 0;
1967	int nrpages = MAX_BIO_BLOCKS(sbi);
1968
1969	do {
1970		readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1971
1972		start = start_blk * sit_i->sents_per_block;
1973		end = (start_blk + readed) * sit_i->sents_per_block;
1974
1975		for (; start < end && start < MAIN_SEGS(sbi); start++) {
1976			struct seg_entry *se = &sit_i->sentries[start];
1977			struct f2fs_sit_block *sit_blk;
1978			struct f2fs_sit_entry sit;
1979			struct page *page;
1980
1981			mutex_lock(&curseg->curseg_mutex);
1982			for (i = 0; i < sits_in_cursum(sum); i++) {
1983				if (le32_to_cpu(segno_in_journal(sum, i))
1984								== start) {
1985					sit = sit_in_journal(sum, i);
1986					mutex_unlock(&curseg->curseg_mutex);
1987					goto got_it;
1988				}
1989			}
1990			mutex_unlock(&curseg->curseg_mutex);
1991
1992			page = get_current_sit_page(sbi, start);
1993			sit_blk = (struct f2fs_sit_block *)page_address(page);
1994			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1995			f2fs_put_page(page, 1);
1996got_it:
1997			check_block_count(sbi, start, &sit);
1998			seg_info_from_raw_sit(se, &sit);
1999			if (sbi->segs_per_sec > 1) {
2000				struct sec_entry *e = get_sec_entry(sbi, start);
2001				e->valid_blocks += se->valid_blocks;
2002			}
2003		}
2004		start_blk += readed;
2005	} while (start_blk < sit_blk_cnt);
2006}
2007
2008static void init_free_segmap(struct f2fs_sb_info *sbi)
2009{
2010	unsigned int start;
2011	int type;
2012
2013	for (start = 0; start < MAIN_SEGS(sbi); start++) {
2014		struct seg_entry *sentry = get_seg_entry(sbi, start);
2015		if (!sentry->valid_blocks)
2016			__set_free(sbi, start);
2017	}
2018
2019	/* set use the current segments */
2020	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2021		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2022		__set_test_and_inuse(sbi, curseg_t->segno);
2023	}
2024}
2025
2026static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2027{
2028	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2029	struct free_segmap_info *free_i = FREE_I(sbi);
2030	unsigned int segno = 0, offset = 0;
2031	unsigned short valid_blocks;
2032
2033	while (1) {
2034		/* find dirty segment based on free segmap */
2035		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2036		if (segno >= MAIN_SEGS(sbi))
2037			break;
2038		offset = segno + 1;
2039		valid_blocks = get_valid_blocks(sbi, segno, 0);
2040		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2041			continue;
2042		if (valid_blocks > sbi->blocks_per_seg) {
2043			f2fs_bug_on(sbi, 1);
2044			continue;
2045		}
2046		mutex_lock(&dirty_i->seglist_lock);
2047		__locate_dirty_segment(sbi, segno, DIRTY);
2048		mutex_unlock(&dirty_i->seglist_lock);
2049	}
2050}
2051
2052static int init_victim_secmap(struct f2fs_sb_info *sbi)
2053{
2054	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2055	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2056
2057	dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2058	if (!dirty_i->victim_secmap)
2059		return -ENOMEM;
2060	return 0;
2061}
2062
2063static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2064{
2065	struct dirty_seglist_info *dirty_i;
2066	unsigned int bitmap_size, i;
2067
2068	/* allocate memory for dirty segments list information */
2069	dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2070	if (!dirty_i)
2071		return -ENOMEM;
2072
2073	SM_I(sbi)->dirty_info = dirty_i;
2074	mutex_init(&dirty_i->seglist_lock);
2075
2076	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2077
2078	for (i = 0; i < NR_DIRTY_TYPE; i++) {
2079		dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2080		if (!dirty_i->dirty_segmap[i])
2081			return -ENOMEM;
2082	}
2083
2084	init_dirty_segmap(sbi);
2085	return init_victim_secmap(sbi);
2086}
2087
2088/*
2089 * Update min, max modified time for cost-benefit GC algorithm
2090 */
2091static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2092{
2093	struct sit_info *sit_i = SIT_I(sbi);
2094	unsigned int segno;
2095
2096	mutex_lock(&sit_i->sentry_lock);
2097
2098	sit_i->min_mtime = LLONG_MAX;
2099
2100	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2101		unsigned int i;
2102		unsigned long long mtime = 0;
2103
2104		for (i = 0; i < sbi->segs_per_sec; i++)
2105			mtime += get_seg_entry(sbi, segno + i)->mtime;
2106
2107		mtime = div_u64(mtime, sbi->segs_per_sec);
2108
2109		if (sit_i->min_mtime > mtime)
2110			sit_i->min_mtime = mtime;
2111	}
2112	sit_i->max_mtime = get_mtime(sbi);
2113	mutex_unlock(&sit_i->sentry_lock);
2114}
2115
2116int build_segment_manager(struct f2fs_sb_info *sbi)
2117{
2118	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2119	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2120	struct f2fs_sm_info *sm_info;
2121	int err;
2122
2123	sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2124	if (!sm_info)
2125		return -ENOMEM;
2126
2127	/* init sm info */
2128	sbi->sm_info = sm_info;
2129	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2130	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2131	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2132	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2133	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2134	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2135	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2136	sm_info->rec_prefree_segments = sm_info->main_segments *
2137					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2138	sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2139	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2140	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2141
2142	INIT_LIST_HEAD(&sm_info->discard_list);
2143	sm_info->nr_discards = 0;
2144	sm_info->max_discards = 0;
2145
2146	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2147
2148	INIT_LIST_HEAD(&sm_info->sit_entry_set);
2149
2150	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2151		err = create_flush_cmd_control(sbi);
2152		if (err)
2153			return err;
2154	}
2155
2156	err = build_sit_info(sbi);
2157	if (err)
2158		return err;
2159	err = build_free_segmap(sbi);
2160	if (err)
2161		return err;
2162	err = build_curseg(sbi);
2163	if (err)
2164		return err;
2165
2166	/* reinit free segmap based on SIT */
2167	build_sit_entries(sbi);
2168
2169	init_free_segmap(sbi);
2170	err = build_dirty_segmap(sbi);
2171	if (err)
2172		return err;
2173
2174	init_min_max_mtime(sbi);
2175	return 0;
2176}
2177
2178static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2179		enum dirty_type dirty_type)
2180{
2181	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2182
2183	mutex_lock(&dirty_i->seglist_lock);
2184	kfree(dirty_i->dirty_segmap[dirty_type]);
2185	dirty_i->nr_dirty[dirty_type] = 0;
2186	mutex_unlock(&dirty_i->seglist_lock);
2187}
2188
2189static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2190{
2191	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2192	kfree(dirty_i->victim_secmap);
2193}
2194
2195static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2196{
2197	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2198	int i;
2199
2200	if (!dirty_i)
2201		return;
2202
2203	/* discard pre-free/dirty segments list */
2204	for (i = 0; i < NR_DIRTY_TYPE; i++)
2205		discard_dirty_segmap(sbi, i);
2206
2207	destroy_victim_secmap(sbi);
2208	SM_I(sbi)->dirty_info = NULL;
2209	kfree(dirty_i);
2210}
2211
2212static void destroy_curseg(struct f2fs_sb_info *sbi)
2213{
2214	struct curseg_info *array = SM_I(sbi)->curseg_array;
2215	int i;
2216
2217	if (!array)
2218		return;
2219	SM_I(sbi)->curseg_array = NULL;
2220	for (i = 0; i < NR_CURSEG_TYPE; i++)
2221		kfree(array[i].sum_blk);
2222	kfree(array);
2223}
2224
2225static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2226{
2227	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2228	if (!free_i)
2229		return;
2230	SM_I(sbi)->free_info = NULL;
2231	kfree(free_i->free_segmap);
2232	kfree(free_i->free_secmap);
2233	kfree(free_i);
2234}
2235
2236static void destroy_sit_info(struct f2fs_sb_info *sbi)
2237{
2238	struct sit_info *sit_i = SIT_I(sbi);
2239	unsigned int start;
2240
2241	if (!sit_i)
2242		return;
2243
2244	if (sit_i->sentries) {
2245		for (start = 0; start < MAIN_SEGS(sbi); start++) {
2246			kfree(sit_i->sentries[start].cur_valid_map);
2247			kfree(sit_i->sentries[start].ckpt_valid_map);
2248		}
2249	}
2250	kfree(sit_i->tmp_map);
2251
2252	vfree(sit_i->sentries);
2253	vfree(sit_i->sec_entries);
2254	kfree(sit_i->dirty_sentries_bitmap);
2255
2256	SM_I(sbi)->sit_info = NULL;
2257	kfree(sit_i->sit_bitmap);
2258	kfree(sit_i);
2259}
2260
2261void destroy_segment_manager(struct f2fs_sb_info *sbi)
2262{
2263	struct f2fs_sm_info *sm_info = SM_I(sbi);
2264
2265	if (!sm_info)
2266		return;
2267	destroy_flush_cmd_control(sbi);
2268	destroy_dirty_segmap(sbi);
2269	destroy_curseg(sbi);
2270	destroy_free_segmap(sbi);
2271	destroy_sit_info(sbi);
2272	sbi->sm_info = NULL;
2273	kfree(sm_info);
2274}
2275
2276int __init create_segment_manager_caches(void)
2277{
2278	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2279			sizeof(struct discard_entry));
2280	if (!discard_entry_slab)
2281		goto fail;
2282
2283	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2284			sizeof(struct sit_entry_set));
2285	if (!sit_entry_set_slab)
2286		goto destory_discard_entry;
2287
2288	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2289			sizeof(struct inmem_pages));
2290	if (!inmem_entry_slab)
2291		goto destroy_sit_entry_set;
2292	return 0;
2293
2294destroy_sit_entry_set:
2295	kmem_cache_destroy(sit_entry_set_slab);
2296destory_discard_entry:
2297	kmem_cache_destroy(discard_entry_slab);
2298fail:
2299	return -ENOMEM;
2300}
2301
2302void destroy_segment_manager_caches(void)
2303{
2304	kmem_cache_destroy(sit_entry_set_slab);
2305	kmem_cache_destroy(discard_entry_slab);
2306	kmem_cache_destroy(inmem_entry_slab);
2307}
2308