1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/include/lustre_disk.h
37  *
38  * Lustre disk format definitions.
39  *
40  * Author: Nathan Rutman <nathan@clusterfs.com>
41  */
42 
43 #ifndef _LUSTRE_DISK_H
44 #define _LUSTRE_DISK_H
45 
46 /** \defgroup disk disk
47  *
48  * @{
49  */
50 
51 #include "../../include/linux/libcfs/libcfs.h"
52 #include "../../include/linux/lnet/types.h"
53 #include <linux/backing-dev.h>
54 
55 /****************** persistent mount data *********************/
56 
57 #define LDD_F_SV_TYPE_MDT   0x0001
58 #define LDD_F_SV_TYPE_OST   0x0002
59 #define LDD_F_SV_TYPE_MGS   0x0004
60 #define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
61 			    LDD_F_SV_TYPE_OST  | \
62 			    LDD_F_SV_TYPE_MGS)
63 #define LDD_F_SV_ALL	0x0008
64 
65 /****************** mount command *********************/
66 
67 /* The lmd is only used internally by Lustre; mount simply passes
68    everything as string options */
69 
70 #define LMD_MAGIC    0xbdacbd03
71 
72 /* gleaned from the mount command - no persistent info here */
73 struct lustre_mount_data {
74 	__u32      lmd_magic;
75 	__u32      lmd_flags;	 /* lustre mount flags */
76 	int	lmd_mgs_failnodes; /* mgs failover node count */
77 	int	lmd_exclude_count;
78 	int	lmd_recovery_time_soft;
79 	int	lmd_recovery_time_hard;
80 	char      *lmd_dev;	   /* device name */
81 	char      *lmd_profile;       /* client only */
82 	char      *lmd_mgssec;	/* sptlrpc flavor to mgs */
83 	char      *lmd_opts;	  /* lustre mount options (as opposed to
84 					 _device_ mount options) */
85 	char      *lmd_params;	/* lustre params */
86 	__u32     *lmd_exclude;       /* array of OSTs to ignore */
87 	char	*lmd_mgs;	   /* MGS nid */
88 	char	*lmd_osd_type;      /* OSD type */
89 };
90 
91 #define LMD_FLG_SERVER		0x0001	/* Mounting a server */
92 #define LMD_FLG_CLIENT		0x0002	/* Mounting a client */
93 #define LMD_FLG_ABORT_RECOV	0x0008	/* Abort recovery */
94 #define LMD_FLG_NOSVC		0x0010	/* Only start MGS/MGC for servers,
95 					   no other services */
96 #define LMD_FLG_NOMGS		0x0020	/* Only start target for servers, reusing
97 					   existing MGS services */
98 #define LMD_FLG_WRITECONF	0x0040	/* Rewrite config log */
99 #define LMD_FLG_NOIR		0x0080	/* NO imperative recovery */
100 #define LMD_FLG_NOSCRUB		0x0100	/* Do not trigger scrub automatically */
101 #define LMD_FLG_MGS		0x0200	/* Also start MGS along with server */
102 #define LMD_FLG_IAM		0x0400	/* IAM dir */
103 #define LMD_FLG_NO_PRIMNODE	0x0800	/* all nodes are service nodes */
104 #define LMD_FLG_VIRGIN		0x1000	/* the service registers first time */
105 #define LMD_FLG_UPDATE		0x2000	/* update parameters */
106 #define LMD_FLG_HSM		0x4000	/* Start coordinator */
107 
108 #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
109 
110 /****************** last_rcvd file *********************/
111 
112 /** version recovery epoch */
113 #define LR_EPOCH_BITS   32
114 #define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
115 #define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
116 #define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
117 
118 #define LR_SERVER_SIZE   512
119 #define LR_CLIENT_START 8192
120 #define LR_CLIENT_SIZE   128
121 #if LR_CLIENT_START < LR_SERVER_SIZE
122 #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
123 #endif
124 
125 /*
126  * This limit is arbitrary (131072 clients on x86), but it is convenient to use
127  * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
128  * If we need more than 131072 clients (order-2 allocation on x86) then this
129  * should become an array of single-page pointers that are allocated on demand.
130  */
131 #if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
132 #define LR_MAX_CLIENTS (128 * 1024UL)
133 #else
134 #define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
135 #endif
136 
137 /** COMPAT_146: this is an OST (temporary) */
138 #define OBD_COMPAT_OST	  0x00000002
139 /** COMPAT_146: this is an MDT (temporary) */
140 #define OBD_COMPAT_MDT	  0x00000004
141 /** 2.0 server, interop flag to show server version is changed */
142 #define OBD_COMPAT_20	   0x00000008
143 
144 /** MDS handles LOV_OBJID file */
145 #define OBD_ROCOMPAT_LOVOBJID   0x00000001
146 
147 /** OST handles group subdirs */
148 #define OBD_INCOMPAT_GROUPS     0x00000001
149 /** this is an OST */
150 #define OBD_INCOMPAT_OST	0x00000002
151 /** this is an MDT */
152 #define OBD_INCOMPAT_MDT	0x00000004
153 /** common last_rvcd format */
154 #define OBD_INCOMPAT_COMMON_LR  0x00000008
155 /** FID is enabled */
156 #define OBD_INCOMPAT_FID	0x00000010
157 /** Size-on-MDS is enabled */
158 #define OBD_INCOMPAT_SOM	0x00000020
159 /** filesystem using iam format to store directory entries */
160 #define OBD_INCOMPAT_IAM_DIR    0x00000040
161 /** LMA attribute contains per-inode incompatible flags */
162 #define OBD_INCOMPAT_LMA	0x00000080
163 /** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
164  * bits are now used to store a generation. Once we start changing the layout
165  * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
166  * will be confused by interpreting stripe_count | gen << 16 as the actual
167  * stripe count */
168 #define OBD_INCOMPAT_LMM_VER    0x00000100
169 /** multiple OI files for MDT */
170 #define OBD_INCOMPAT_MULTI_OI   0x00000200
171 
172 /* Data stored per server at the head of the last_rcvd file.  In le32 order.
173    This should be common to filter_internal.h, lustre_mds.h */
174 struct lr_server_data {
175 	__u8  lsd_uuid[40];	/* server UUID */
176 	__u64 lsd_last_transno;    /* last completed transaction ID */
177 	__u64 lsd_compat14;	/* reserved - compat with old last_rcvd */
178 	__u64 lsd_mount_count;     /* incarnation number */
179 	__u32 lsd_feature_compat;  /* compatible feature flags */
180 	__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
181 	__u32 lsd_feature_incompat;/* incompatible feature flags */
182 	__u32 lsd_server_size;     /* size of server data area */
183 	__u32 lsd_client_start;    /* start of per-client data area */
184 	__u16 lsd_client_size;     /* size of per-client data area */
185 	__u16 lsd_subdir_count;    /* number of subdirectories for objects */
186 	__u64 lsd_catalog_oid;     /* recovery catalog object id */
187 	__u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
188 	__u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
189 	__u32 lsd_osd_index;       /* index number of OST in LOV */
190 	__u32 lsd_padding1;	/* was lsd_mdt_index, unused in 2.4.0 */
191 	__u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
192 	/** transaction values since lsd_trans_table_time */
193 	__u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
194 	/** start point of transno table below */
195 	__u32 lsd_trans_table_time; /* time of first slot in table above */
196 	__u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
197 	__u8  lsd_padding[LR_SERVER_SIZE - 288];
198 };
199 
200 /* Data stored per client in the last_rcvd file.  In le32 order. */
201 struct lsd_client_data {
202 	__u8  lcd_uuid[40];      /* client UUID */
203 	__u64 lcd_last_transno; /* last completed transaction ID */
204 	__u64 lcd_last_xid;     /* xid for the last transaction */
205 	__u32 lcd_last_result;  /* result from last RPC */
206 	__u32 lcd_last_data;    /* per-op data (disposition for open &c.) */
207 	/* for MDS_CLOSE requests */
208 	__u64 lcd_last_close_transno; /* last completed transaction ID */
209 	__u64 lcd_last_close_xid;     /* xid for the last transaction */
210 	__u32 lcd_last_close_result;  /* result from last RPC */
211 	__u32 lcd_last_close_data;    /* per-op data */
212 	/* VBR: last versions */
213 	__u64 lcd_pre_versions[4];
214 	__u32 lcd_last_epoch;
215 	/** orphans handling for delayed export rely on that */
216 	__u32 lcd_first_epoch;
217 	__u8  lcd_padding[LR_CLIENT_SIZE - 128];
218 };
219 
220 /* bug20354: the lcd_uuid for export of clients may be wrong */
check_lcd(char * obd_name,int index,struct lsd_client_data * lcd)221 static inline void check_lcd(char *obd_name, int index,
222 			     struct lsd_client_data *lcd)
223 {
224 	int length = sizeof(lcd->lcd_uuid);
225 
226 	if (strnlen((char *)lcd->lcd_uuid, length) == length) {
227 		lcd->lcd_uuid[length - 1] = '\0';
228 
229 		LCONSOLE_ERROR("the client UUID (%s) on %s for exports stored in last_rcvd(index = %d) is bad!\n",
230 			       lcd->lcd_uuid, obd_name, index);
231 	}
232 }
233 
234 /* last_rcvd handling */
lsd_le_to_cpu(struct lr_server_data * buf,struct lr_server_data * lsd)235 static inline void lsd_le_to_cpu(struct lr_server_data *buf,
236 				 struct lr_server_data *lsd)
237 {
238 	int i;
239 
240 	memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
241 	lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
242 	lsd->lsd_compat14	 = le64_to_cpu(buf->lsd_compat14);
243 	lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
244 	lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
245 	lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
246 	lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
247 	lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
248 	lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
249 	lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
250 	lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
251 	lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
252 	lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
253 	memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
254 	lsd->lsd_osd_index	= le32_to_cpu(buf->lsd_osd_index);
255 	lsd->lsd_padding1	= le32_to_cpu(buf->lsd_padding1);
256 	lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
257 	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
258 		lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
259 	lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
260 	lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
261 }
262 
lsd_cpu_to_le(struct lr_server_data * lsd,struct lr_server_data * buf)263 static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
264 				 struct lr_server_data *buf)
265 {
266 	int i;
267 
268 	memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
269 	buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
270 	buf->lsd_compat14	 = cpu_to_le64(lsd->lsd_compat14);
271 	buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
272 	buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
273 	buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
274 	buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
275 	buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
276 	buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
277 	buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
278 	buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
279 	buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
280 	buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
281 	memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
282 	buf->lsd_osd_index	  = cpu_to_le32(lsd->lsd_osd_index);
283 	buf->lsd_padding1	  = cpu_to_le32(lsd->lsd_padding1);
284 	buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
285 	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
286 		buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
287 	buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
288 	buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
289 }
290 
lcd_le_to_cpu(struct lsd_client_data * buf,struct lsd_client_data * lcd)291 static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
292 				 struct lsd_client_data *lcd)
293 {
294 	memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
295 	lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
296 	lcd->lcd_last_xid	   = le64_to_cpu(buf->lcd_last_xid);
297 	lcd->lcd_last_result	= le32_to_cpu(buf->lcd_last_result);
298 	lcd->lcd_last_data	  = le32_to_cpu(buf->lcd_last_data);
299 	lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
300 	lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
301 	lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
302 	lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
303 	lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
304 	lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
305 	lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
306 	lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
307 	lcd->lcd_last_epoch	 = le32_to_cpu(buf->lcd_last_epoch);
308 	lcd->lcd_first_epoch	= le32_to_cpu(buf->lcd_first_epoch);
309 }
310 
lcd_cpu_to_le(struct lsd_client_data * lcd,struct lsd_client_data * buf)311 static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
312 				 struct lsd_client_data *buf)
313 {
314 	memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
315 	buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
316 	buf->lcd_last_xid	   = cpu_to_le64(lcd->lcd_last_xid);
317 	buf->lcd_last_result	= cpu_to_le32(lcd->lcd_last_result);
318 	buf->lcd_last_data	  = cpu_to_le32(lcd->lcd_last_data);
319 	buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
320 	buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
321 	buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
322 	buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
323 	buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
324 	buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
325 	buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
326 	buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
327 	buf->lcd_last_epoch	 = cpu_to_le32(lcd->lcd_last_epoch);
328 	buf->lcd_first_epoch	= cpu_to_le32(lcd->lcd_first_epoch);
329 }
330 
lcd_last_transno(struct lsd_client_data * lcd)331 static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
332 {
333 	return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
334 		lcd->lcd_last_transno : lcd->lcd_last_close_transno);
335 }
336 
lcd_last_xid(struct lsd_client_data * lcd)337 static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
338 {
339 	return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
340 		lcd->lcd_last_xid : lcd->lcd_last_close_xid);
341 }
342 
343 /****************** superblock additional info *********************/
344 
345 struct ll_sb_info;
346 
347 struct lustre_sb_info {
348 	int		       lsi_flags;
349 	struct obd_device	*lsi_mgc;     /* mgc obd */
350 	struct lustre_mount_data *lsi_lmd;     /* mount command info */
351 	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
352 	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
353 	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
354 	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
355 	char			  lsi_svname[MTI_NAME_MAXLEN];
356 	char			  lsi_osd_obdname[64];
357 	char			  lsi_osd_uuid[64];
358 	struct obd_export	 *lsi_osd_exp;
359 	char			  lsi_osd_type[16];
360 	char			  lsi_fstype[16];
361 	struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
362 						  own backing_dev_info */
363 };
364 
365 #define LSI_UMOUNT_FAILOVER	      0x00200000
366 #define LSI_BDI_INITIALIZED	      0x00400000
367 
368 #define     s2lsi(sb)	((struct lustre_sb_info *)((sb)->s_fs_info))
369 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
370 
371 #define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
372 #define	    get_mount_flags(sb)	   (s2lsi(sb)->lsi_lmd->lmd_flags)
373 #define	    get_mntdev_name(sb)	   (s2lsi(sb)->lsi_lmd->lmd_dev)
374 
375 /****************** mount lookup info *********************/
376 
377 struct lustre_mount_info {
378 	char		 *lmi_name;
379 	struct super_block   *lmi_sb;
380 	struct vfsmount      *lmi_mnt;
381 	struct list_head	    lmi_list_chain;
382 };
383 
384 /****************** prototypes *********************/
385 
386 /* obd_mount.c */
387 
388 int lustre_start_mgc(struct super_block *sb);
389 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
390 						  struct vfsmount *mnt));
391 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
392 int lustre_common_put_super(struct super_block *sb);
393 
394 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
395 
396 /** @} disk */
397 
398 #endif /* _LUSTRE_DISK_H */
399