1/*
2 * fs/logfs/super.c
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 *
8 * Generally contains mount/umount code and also serves as a dump area for
9 * any functions that don't fit elsewhere and neither justify a file of their
10 * own.
11 */
12#include "logfs.h"
13#include <linux/bio.h>
14#include <linux/slab.h>
15#include <linux/blkdev.h>
16#include <linux/module.h>
17#include <linux/mtd/mtd.h>
18#include <linux/statfs.h>
19#include <linux/buffer_head.h>
20
21static DEFINE_MUTEX(emergency_mutex);
22static struct page *emergency_page;
23
24struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
25{
26	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
27	struct page *page;
28	int err;
29
30	page = read_cache_page(mapping, index, filler, NULL);
31	if (page)
32		return page;
33
34	/* No more pages available, switch to emergency page */
35	printk(KERN_INFO"Logfs: Using emergency page\n");
36	mutex_lock(&emergency_mutex);
37	err = filler(NULL, emergency_page);
38	if (err) {
39		mutex_unlock(&emergency_mutex);
40		printk(KERN_EMERG"Logfs: Error reading emergency page\n");
41		return ERR_PTR(err);
42	}
43	return emergency_page;
44}
45
46void emergency_read_end(struct page *page)
47{
48	if (page == emergency_page)
49		mutex_unlock(&emergency_mutex);
50	else
51		page_cache_release(page);
52}
53
54static void dump_segfile(struct super_block *sb)
55{
56	struct logfs_super *super = logfs_super(sb);
57	struct logfs_segment_entry se;
58	u32 segno;
59
60	for (segno = 0; segno < super->s_no_segs; segno++) {
61		logfs_get_segment_entry(sb, segno, &se);
62		printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
63				be32_to_cpu(se.valid));
64		if (++segno < super->s_no_segs) {
65			logfs_get_segment_entry(sb, segno, &se);
66			printk(" %6x %8x", be32_to_cpu(se.ec_level),
67					be32_to_cpu(se.valid));
68		}
69		if (++segno < super->s_no_segs) {
70			logfs_get_segment_entry(sb, segno, &se);
71			printk(" %6x %8x", be32_to_cpu(se.ec_level),
72					be32_to_cpu(se.valid));
73		}
74		if (++segno < super->s_no_segs) {
75			logfs_get_segment_entry(sb, segno, &se);
76			printk(" %6x %8x", be32_to_cpu(se.ec_level),
77					be32_to_cpu(se.valid));
78		}
79		printk("\n");
80	}
81}
82
83/*
84 * logfs_crash_dump - dump debug information to device
85 *
86 * The LogFS superblock only occupies part of a segment.  This function will
87 * write as much debug information as it can gather into the spare space.
88 */
89void logfs_crash_dump(struct super_block *sb)
90{
91	dump_segfile(sb);
92}
93
94/*
95 * FIXME: There should be a reserve for root, similar to ext2.
96 */
97int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
98{
99	struct super_block *sb = dentry->d_sb;
100	struct logfs_super *super = logfs_super(sb);
101
102	stats->f_type		= LOGFS_MAGIC_U32;
103	stats->f_bsize		= sb->s_blocksize;
104	stats->f_blocks		= super->s_size >> LOGFS_BLOCK_BITS >> 3;
105	stats->f_bfree		= super->s_free_bytes >> sb->s_blocksize_bits;
106	stats->f_bavail		= super->s_free_bytes >> sb->s_blocksize_bits;
107	stats->f_files		= 0;
108	stats->f_ffree		= 0;
109	stats->f_namelen	= LOGFS_MAX_NAMELEN;
110	return 0;
111}
112
113static int logfs_sb_set(struct super_block *sb, void *_super)
114{
115	struct logfs_super *super = _super;
116
117	sb->s_fs_info = super;
118	sb->s_mtd = super->s_mtd;
119	sb->s_bdev = super->s_bdev;
120#ifdef CONFIG_BLOCK
121	if (sb->s_bdev)
122		sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
123#endif
124#ifdef CONFIG_MTD
125	if (sb->s_mtd)
126		sb->s_bdi = sb->s_mtd->backing_dev_info;
127#endif
128	return 0;
129}
130
131static int logfs_sb_test(struct super_block *sb, void *_super)
132{
133	struct logfs_super *super = _super;
134	struct mtd_info *mtd = super->s_mtd;
135
136	if (mtd && sb->s_mtd == mtd)
137		return 1;
138	if (super->s_bdev && sb->s_bdev == super->s_bdev)
139		return 1;
140	return 0;
141}
142
143static void set_segment_header(struct logfs_segment_header *sh, u8 type,
144		u8 level, u32 segno, u32 ec)
145{
146	sh->pad = 0;
147	sh->type = type;
148	sh->level = level;
149	sh->segno = cpu_to_be32(segno);
150	sh->ec = cpu_to_be32(ec);
151	sh->gec = cpu_to_be64(segno);
152	sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
153}
154
155static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
156		u32 segno, u32 ec)
157{
158	struct logfs_super *super = logfs_super(sb);
159	struct logfs_segment_header *sh = &ds->ds_sh;
160	int i;
161
162	memset(ds, 0, sizeof(*ds));
163	set_segment_header(sh, SEG_SUPER, 0, segno, ec);
164
165	ds->ds_ifile_levels	= super->s_ifile_levels;
166	ds->ds_iblock_levels	= super->s_iblock_levels;
167	ds->ds_data_levels	= super->s_data_levels; /* XXX: Remove */
168	ds->ds_segment_shift	= super->s_segshift;
169	ds->ds_block_shift	= sb->s_blocksize_bits;
170	ds->ds_write_shift	= super->s_writeshift;
171	ds->ds_filesystem_size	= cpu_to_be64(super->s_size);
172	ds->ds_segment_size	= cpu_to_be32(super->s_segsize);
173	ds->ds_bad_seg_reserve	= cpu_to_be32(super->s_bad_seg_reserve);
174	ds->ds_feature_incompat	= cpu_to_be64(super->s_feature_incompat);
175	ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
176	ds->ds_feature_compat	= cpu_to_be64(super->s_feature_compat);
177	ds->ds_feature_flags	= cpu_to_be64(super->s_feature_flags);
178	ds->ds_root_reserve	= cpu_to_be64(super->s_root_reserve);
179	ds->ds_speed_reserve	= cpu_to_be64(super->s_speed_reserve);
180	journal_for_each(i)
181		ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
182	ds->ds_magic		= cpu_to_be64(LOGFS_MAGIC);
183	ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
184			LOGFS_SEGMENT_HEADERSIZE + 12);
185}
186
187static int write_one_sb(struct super_block *sb,
188		struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
189{
190	struct logfs_super *super = logfs_super(sb);
191	struct logfs_disk_super *ds;
192	struct logfs_segment_entry se;
193	struct page *page;
194	u64 ofs;
195	u32 ec, segno;
196	int err;
197
198	page = find_sb(sb, &ofs);
199	if (!page)
200		return -EIO;
201	ds = page_address(page);
202	segno = seg_no(sb, ofs);
203	logfs_get_segment_entry(sb, segno, &se);
204	ec = be32_to_cpu(se.ec_level) >> 4;
205	ec++;
206	logfs_set_segment_erased(sb, segno, ec, 0);
207	logfs_write_ds(sb, ds, segno, ec);
208	err = super->s_devops->write_sb(sb, page);
209	page_cache_release(page);
210	return err;
211}
212
213int logfs_write_sb(struct super_block *sb)
214{
215	struct logfs_super *super = logfs_super(sb);
216	int err;
217
218	/* First superblock */
219	err = write_one_sb(sb, super->s_devops->find_first_sb);
220	if (err)
221		return err;
222
223	/* Last superblock */
224	err = write_one_sb(sb, super->s_devops->find_last_sb);
225	if (err)
226		return err;
227	return 0;
228}
229
230static int ds_cmp(const void *ds0, const void *ds1)
231{
232	size_t len = sizeof(struct logfs_disk_super);
233
234	/* We know the segment headers differ, so ignore them */
235	len -= LOGFS_SEGMENT_HEADERSIZE;
236	ds0 += LOGFS_SEGMENT_HEADERSIZE;
237	ds1 += LOGFS_SEGMENT_HEADERSIZE;
238	return memcmp(ds0, ds1, len);
239}
240
241static int logfs_recover_sb(struct super_block *sb)
242{
243	struct logfs_super *super = logfs_super(sb);
244	struct logfs_disk_super _ds0, *ds0 = &_ds0;
245	struct logfs_disk_super _ds1, *ds1 = &_ds1;
246	int err, valid0, valid1;
247
248	/* read first superblock */
249	err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
250	if (err)
251		return err;
252	/* read last superblock */
253	err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
254	if (err)
255		return err;
256	valid0 = logfs_check_ds(ds0) == 0;
257	valid1 = logfs_check_ds(ds1) == 0;
258
259	if (!valid0 && valid1) {
260		printk(KERN_INFO"First superblock is invalid - fixing.\n");
261		return write_one_sb(sb, super->s_devops->find_first_sb);
262	}
263	if (valid0 && !valid1) {
264		printk(KERN_INFO"Last superblock is invalid - fixing.\n");
265		return write_one_sb(sb, super->s_devops->find_last_sb);
266	}
267	if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
268		printk(KERN_INFO"Superblocks don't match - fixing.\n");
269		return logfs_write_sb(sb);
270	}
271	/* If neither is valid now, something's wrong.  Didn't we properly
272	 * check them before?!? */
273	BUG_ON(!valid0 && !valid1);
274	return 0;
275}
276
277static int logfs_make_writeable(struct super_block *sb)
278{
279	int err;
280
281	err = logfs_open_segfile(sb);
282	if (err)
283		return err;
284
285	/* Repair any broken superblock copies */
286	err = logfs_recover_sb(sb);
287	if (err)
288		return err;
289
290	/* Check areas for trailing unaccounted data */
291	err = logfs_check_areas(sb);
292	if (err)
293		return err;
294
295	/* Do one GC pass before any data gets dirtied */
296	logfs_gc_pass(sb);
297
298	/* after all initializations are done, replay the journal
299	 * for rw-mounts, if necessary */
300	err = logfs_replay_journal(sb);
301	if (err)
302		return err;
303
304	return 0;
305}
306
307static int logfs_get_sb_final(struct super_block *sb)
308{
309	struct logfs_super *super = logfs_super(sb);
310	struct inode *rootdir;
311	int err;
312
313	/* root dir */
314	rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
315	if (IS_ERR(rootdir))
316		goto fail;
317
318	sb->s_root = d_make_root(rootdir);
319	if (!sb->s_root)
320		goto fail;
321
322	/* at that point we know that ->put_super() will be called */
323	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
324	if (!super->s_erase_page)
325		return -ENOMEM;
326	memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
327
328	/* FIXME: check for read-only mounts */
329	err = logfs_make_writeable(sb);
330	if (err) {
331		__free_page(super->s_erase_page);
332		return err;
333	}
334
335	log_super("LogFS: Finished mounting\n");
336	return 0;
337
338fail:
339	iput(super->s_master_inode);
340	iput(super->s_segfile_inode);
341	iput(super->s_mapping_inode);
342	return -EIO;
343}
344
345int logfs_check_ds(struct logfs_disk_super *ds)
346{
347	struct logfs_segment_header *sh = &ds->ds_sh;
348
349	if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
350		return -EINVAL;
351	if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
352		return -EINVAL;
353	if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
354				LOGFS_SEGMENT_HEADERSIZE + 12))
355		return -EINVAL;
356	return 0;
357}
358
359static struct page *find_super_block(struct super_block *sb)
360{
361	struct logfs_super *super = logfs_super(sb);
362	struct page *first, *last;
363
364	first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
365	if (!first || IS_ERR(first))
366		return NULL;
367	last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
368	if (!last || IS_ERR(last)) {
369		page_cache_release(first);
370		return NULL;
371	}
372
373	if (!logfs_check_ds(page_address(first))) {
374		page_cache_release(last);
375		return first;
376	}
377
378	/* First one didn't work, try the second superblock */
379	if (!logfs_check_ds(page_address(last))) {
380		page_cache_release(first);
381		return last;
382	}
383
384	/* Neither worked, sorry folks */
385	page_cache_release(first);
386	page_cache_release(last);
387	return NULL;
388}
389
390static int __logfs_read_sb(struct super_block *sb)
391{
392	struct logfs_super *super = logfs_super(sb);
393	struct page *page;
394	struct logfs_disk_super *ds;
395	int i;
396
397	page = find_super_block(sb);
398	if (!page)
399		return -EINVAL;
400
401	ds = page_address(page);
402	super->s_size = be64_to_cpu(ds->ds_filesystem_size);
403	super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
404	super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
405	super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
406	super->s_segsize = 1 << ds->ds_segment_shift;
407	super->s_segmask = (1 << ds->ds_segment_shift) - 1;
408	super->s_segshift = ds->ds_segment_shift;
409	sb->s_blocksize = 1 << ds->ds_block_shift;
410	sb->s_blocksize_bits = ds->ds_block_shift;
411	super->s_writesize = 1 << ds->ds_write_shift;
412	super->s_writeshift = ds->ds_write_shift;
413	super->s_no_segs = super->s_size >> super->s_segshift;
414	super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
415	super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
416	super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
417	super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
418	super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
419
420	journal_for_each(i)
421		super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
422
423	super->s_ifile_levels = ds->ds_ifile_levels;
424	super->s_iblock_levels = ds->ds_iblock_levels;
425	super->s_data_levels = ds->ds_data_levels;
426	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
427		+ super->s_data_levels;
428	page_cache_release(page);
429	return 0;
430}
431
432static int logfs_read_sb(struct super_block *sb, int read_only)
433{
434	struct logfs_super *super = logfs_super(sb);
435	int ret;
436
437	super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
438	if (!super->s_btree_pool)
439		return -ENOMEM;
440
441	btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
442	btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
443	btree_init_mempool32(&super->s_shadow_tree.segment_map,
444			super->s_btree_pool);
445
446	ret = logfs_init_mapping(sb);
447	if (ret)
448		return ret;
449
450	ret = __logfs_read_sb(sb);
451	if (ret)
452		return ret;
453
454	if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
455		return -EIO;
456	if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
457			!read_only)
458		return -EIO;
459
460	ret = logfs_init_rw(sb);
461	if (ret)
462		return ret;
463
464	ret = logfs_init_areas(sb);
465	if (ret)
466		return ret;
467
468	ret = logfs_init_gc(sb);
469	if (ret)
470		return ret;
471
472	ret = logfs_init_journal(sb);
473	if (ret)
474		return ret;
475
476	return 0;
477}
478
479static void logfs_kill_sb(struct super_block *sb)
480{
481	struct logfs_super *super = logfs_super(sb);
482
483	log_super("LogFS: Start unmounting\n");
484	/* Alias entries slow down mount, so evict as many as possible */
485	sync_filesystem(sb);
486	logfs_write_anchor(sb);
487	free_areas(sb);
488
489	/*
490	 * From this point on alias entries are simply dropped - and any
491	 * writes to the object store are considered bugs.
492	 */
493	log_super("LogFS: Now in shutdown\n");
494	generic_shutdown_super(sb);
495	super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
496
497	BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
498
499	logfs_cleanup_gc(sb);
500	logfs_cleanup_journal(sb);
501	logfs_cleanup_areas(sb);
502	logfs_cleanup_rw(sb);
503	if (super->s_erase_page)
504		__free_page(super->s_erase_page);
505	super->s_devops->put_device(super);
506	logfs_mempool_destroy(super->s_btree_pool);
507	logfs_mempool_destroy(super->s_alias_pool);
508	kfree(super);
509	log_super("LogFS: Finished unmounting\n");
510}
511
512static struct dentry *logfs_get_sb_device(struct logfs_super *super,
513		struct file_system_type *type, int flags)
514{
515	struct super_block *sb;
516	int err = -ENOMEM;
517	static int mount_count;
518
519	log_super("LogFS: Start mount %x\n", mount_count++);
520
521	err = -EINVAL;
522	sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super);
523	if (IS_ERR(sb)) {
524		super->s_devops->put_device(super);
525		kfree(super);
526		return ERR_CAST(sb);
527	}
528
529	if (sb->s_root) {
530		/* Device is already in use */
531		super->s_devops->put_device(super);
532		kfree(super);
533		return dget(sb->s_root);
534	}
535
536	/*
537	 * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
538	 * only covers 16TB and the upper 8TB are used for indirect blocks.
539	 * On 64bit system we could bump up the limit, but that would make
540	 * the filesystem incompatible with 32bit systems.
541	 */
542	sb->s_maxbytes	= (1ull << 43) - 1;
543	sb->s_max_links = LOGFS_LINK_MAX;
544	sb->s_op	= &logfs_super_operations;
545
546	err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
547	if (err)
548		goto err1;
549
550	sb->s_flags |= MS_ACTIVE;
551	err = logfs_get_sb_final(sb);
552	if (err) {
553		deactivate_locked_super(sb);
554		return ERR_PTR(err);
555	}
556	return dget(sb->s_root);
557
558err1:
559	/* no ->s_root, no ->put_super() */
560	iput(super->s_master_inode);
561	iput(super->s_segfile_inode);
562	iput(super->s_mapping_inode);
563	deactivate_locked_super(sb);
564	return ERR_PTR(err);
565}
566
567static struct dentry *logfs_mount(struct file_system_type *type, int flags,
568		const char *devname, void *data)
569{
570	ulong mtdnr;
571	struct logfs_super *super;
572	int err;
573
574	super = kzalloc(sizeof(*super), GFP_KERNEL);
575	if (!super)
576		return ERR_PTR(-ENOMEM);
577
578	mutex_init(&super->s_dirop_mutex);
579	mutex_init(&super->s_object_alias_mutex);
580	INIT_LIST_HEAD(&super->s_freeing_list);
581
582	if (!devname)
583		err = logfs_get_sb_bdev(super, type, devname);
584	else if (strncmp(devname, "mtd", 3))
585		err = logfs_get_sb_bdev(super, type, devname);
586	else {
587		char *garbage;
588		mtdnr = simple_strtoul(devname+3, &garbage, 0);
589		if (*garbage)
590			err = -EINVAL;
591		else
592			err = logfs_get_sb_mtd(super, mtdnr);
593	}
594
595	if (err) {
596		kfree(super);
597		return ERR_PTR(err);
598	}
599
600	return logfs_get_sb_device(super, type, flags);
601}
602
603static struct file_system_type logfs_fs_type = {
604	.owner		= THIS_MODULE,
605	.name		= "logfs",
606	.mount		= logfs_mount,
607	.kill_sb	= logfs_kill_sb,
608	.fs_flags	= FS_REQUIRES_DEV,
609
610};
611MODULE_ALIAS_FS("logfs");
612
613static int __init logfs_init(void)
614{
615	int ret;
616
617	emergency_page = alloc_pages(GFP_KERNEL, 0);
618	if (!emergency_page)
619		return -ENOMEM;
620
621	ret = logfs_compr_init();
622	if (ret)
623		goto out1;
624
625	ret = logfs_init_inode_cache();
626	if (ret)
627		goto out2;
628
629	ret = register_filesystem(&logfs_fs_type);
630	if (!ret)
631		return 0;
632	logfs_destroy_inode_cache();
633out2:
634	logfs_compr_exit();
635out1:
636	__free_pages(emergency_page, 0);
637	return ret;
638}
639
640static void __exit logfs_exit(void)
641{
642	unregister_filesystem(&logfs_fs_type);
643	logfs_destroy_inode_cache();
644	logfs_compr_exit();
645	__free_pages(emergency_page, 0);
646}
647
648module_init(logfs_init);
649module_exit(logfs_exit);
650
651MODULE_LICENSE("GPL v2");
652MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
653MODULE_DESCRIPTION("scalable flash filesystem");
654