1/*
2 *  linux/fs/hfsplus/unicode.c
3 *
4 * Copyright (C) 2001
5 * Brad Boyer (flar@allandria.com)
6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
7 *
8 * Handler routines for unicode strings
9 */
10
11#include <linux/types.h>
12#include <linux/nls.h>
13#include "hfsplus_fs.h"
14#include "hfsplus_raw.h"
15
16/* Fold the case of a unicode char, given the 16 bit value */
17/* Returns folded char, or 0 if ignorable */
18static inline u16 case_fold(u16 c)
19{
20	u16 tmp;
21
22	tmp = hfsplus_case_fold_table[c >> 8];
23	if (tmp)
24		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25	else
26		tmp = c;
27	return tmp;
28}
29
30/* Compare unicode strings, return values like normal strcmp */
31int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32		       const struct hfsplus_unistr *s2)
33{
34	u16 len1, len2, c1, c2;
35	const hfsplus_unichr *p1, *p2;
36
37	len1 = be16_to_cpu(s1->length);
38	len2 = be16_to_cpu(s2->length);
39	p1 = s1->unicode;
40	p2 = s2->unicode;
41
42	while (1) {
43		c1 = c2 = 0;
44
45		while (len1 && !c1) {
46			c1 = case_fold(be16_to_cpu(*p1));
47			p1++;
48			len1--;
49		}
50		while (len2 && !c2) {
51			c2 = case_fold(be16_to_cpu(*p2));
52			p2++;
53			len2--;
54		}
55
56		if (c1 != c2)
57			return (c1 < c2) ? -1 : 1;
58		if (!c1 && !c2)
59			return 0;
60	}
61}
62
63/* Compare names as a sequence of 16-bit unsigned integers */
64int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65		   const struct hfsplus_unistr *s2)
66{
67	u16 len1, len2, c1, c2;
68	const hfsplus_unichr *p1, *p2;
69	int len;
70
71	len1 = be16_to_cpu(s1->length);
72	len2 = be16_to_cpu(s2->length);
73	p1 = s1->unicode;
74	p2 = s2->unicode;
75
76	for (len = min(len1, len2); len > 0; len--) {
77		c1 = be16_to_cpu(*p1);
78		c2 = be16_to_cpu(*p2);
79		if (c1 != c2)
80			return c1 < c2 ? -1 : 1;
81		p1++;
82		p2++;
83	}
84
85	return len1 < len2 ? -1 :
86	       len1 > len2 ? 1 : 0;
87}
88
89
90#define Hangul_SBase	0xac00
91#define Hangul_LBase	0x1100
92#define Hangul_VBase	0x1161
93#define Hangul_TBase	0x11a7
94#define Hangul_SCount	11172
95#define Hangul_LCount	19
96#define Hangul_VCount	21
97#define Hangul_TCount	28
98#define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
99
100
101static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102{
103	int i, s, e;
104
105	s = 1;
106	e = p[1];
107	if (!e || cc < p[s * 2] || cc > p[e * 2])
108		return NULL;
109	do {
110		i = (s + e) / 2;
111		if (cc > p[i * 2])
112			s = i + 1;
113		else if (cc < p[i * 2])
114			e = i - 1;
115		else
116			return hfsplus_compose_table + p[i * 2 + 1];
117	} while (s <= e);
118	return NULL;
119}
120
121int hfsplus_uni2asc(struct super_block *sb,
122		const struct hfsplus_unistr *ustr,
123		char *astr, int *len_p)
124{
125	const hfsplus_unichr *ip;
126	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
127	u8 *op;
128	u16 cc, c0, c1;
129	u16 *ce1, *ce2;
130	int i, len, ustrlen, res, compose;
131
132	op = astr;
133	ip = ustr->unicode;
134	ustrlen = be16_to_cpu(ustr->length);
135	len = *len_p;
136	ce1 = NULL;
137	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
138
139	while (ustrlen > 0) {
140		c0 = be16_to_cpu(*ip++);
141		ustrlen--;
142		/* search for single decomposed char */
143		if (likely(compose))
144			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145		if (ce1)
146			cc = ce1[0];
147		else
148			cc = 0;
149		if (cc) {
150			/* start of a possibly decomposed Hangul char */
151			if (cc != 0xffff)
152				goto done;
153			if (!ustrlen)
154				goto same;
155			c1 = be16_to_cpu(*ip) - Hangul_VBase;
156			if (c1 < Hangul_VCount) {
157				/* compose the Hangul char */
158				cc = (c0 - Hangul_LBase) * Hangul_VCount;
159				cc = (cc + c1) * Hangul_TCount;
160				cc += Hangul_SBase;
161				ip++;
162				ustrlen--;
163				if (!ustrlen)
164					goto done;
165				c1 = be16_to_cpu(*ip) - Hangul_TBase;
166				if (c1 > 0 && c1 < Hangul_TCount) {
167					cc += c1;
168					ip++;
169					ustrlen--;
170				}
171				goto done;
172			}
173		}
174		while (1) {
175			/* main loop for common case of not composed chars */
176			if (!ustrlen)
177				goto same;
178			c1 = be16_to_cpu(*ip);
179			if (likely(compose))
180				ce1 = hfsplus_compose_lookup(
181					hfsplus_compose_table, c1);
182			if (ce1)
183				break;
184			switch (c0) {
185			case 0:
186				c0 = 0x2400;
187				break;
188			case '/':
189				c0 = ':';
190				break;
191			}
192			res = nls->uni2char(c0, op, len);
193			if (res < 0) {
194				if (res == -ENAMETOOLONG)
195					goto out;
196				*op = '?';
197				res = 1;
198			}
199			op += res;
200			len -= res;
201			c0 = c1;
202			ip++;
203			ustrlen--;
204		}
205		ce2 = hfsplus_compose_lookup(ce1, c0);
206		if (ce2) {
207			i = 1;
208			while (i < ustrlen) {
209				ce1 = hfsplus_compose_lookup(ce2,
210					be16_to_cpu(ip[i]));
211				if (!ce1)
212					break;
213				i++;
214				ce2 = ce1;
215			}
216			cc = ce2[0];
217			if (cc) {
218				ip += i;
219				ustrlen -= i;
220				goto done;
221			}
222		}
223same:
224		switch (c0) {
225		case 0:
226			cc = 0x2400;
227			break;
228		case '/':
229			cc = ':';
230			break;
231		default:
232			cc = c0;
233		}
234done:
235		res = nls->uni2char(cc, op, len);
236		if (res < 0) {
237			if (res == -ENAMETOOLONG)
238				goto out;
239			*op = '?';
240			res = 1;
241		}
242		op += res;
243		len -= res;
244	}
245	res = 0;
246out:
247	*len_p = (char *)op - astr;
248	return res;
249}
250
251/*
252 * Convert one or more ASCII characters into a single unicode character.
253 * Returns the number of ASCII characters corresponding to the unicode char.
254 */
255static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
256			      wchar_t *uc)
257{
258	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
259	if (size <= 0) {
260		*uc = '?';
261		size = 1;
262	}
263	switch (*uc) {
264	case 0x2400:
265		*uc = 0;
266		break;
267	case ':':
268		*uc = '/';
269		break;
270	}
271	return size;
272}
273
274/* Decomposes a single unicode character. */
275static inline u16 *decompose_unichar(wchar_t uc, int *size)
276{
277	int off;
278
279	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
280	if (off == 0 || off == 0xffff)
281		return NULL;
282
283	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
284	if (!off)
285		return NULL;
286
287	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
288	if (!off)
289		return NULL;
290
291	off = hfsplus_decompose_table[off + (uc & 0xf)];
292	*size = off & 3;
293	if (*size == 0)
294		return NULL;
295	return hfsplus_decompose_table + (off / 4);
296}
297
298int hfsplus_asc2uni(struct super_block *sb,
299		    struct hfsplus_unistr *ustr, int max_unistr_len,
300		    const char *astr, int len)
301{
302	int size, dsize, decompose;
303	u16 *dstr, outlen = 0;
304	wchar_t c;
305
306	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
307	while (outlen < max_unistr_len && len > 0) {
308		size = asc2unichar(sb, astr, len, &c);
309
310		if (decompose)
311			dstr = decompose_unichar(c, &dsize);
312		else
313			dstr = NULL;
314		if (dstr) {
315			if (outlen + dsize > max_unistr_len)
316				break;
317			do {
318				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
319			} while (--dsize > 0);
320		} else
321			ustr->unicode[outlen++] = cpu_to_be16(c);
322
323		astr += size;
324		len -= size;
325	}
326	ustr->length = cpu_to_be16(outlen);
327	if (len > 0)
328		return -ENAMETOOLONG;
329	return 0;
330}
331
332/*
333 * Hash a string to an integer as appropriate for the HFS+ filesystem.
334 * Composed unicode characters are decomposed and case-folding is performed
335 * if the appropriate bits are (un)set on the superblock.
336 */
337int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
338{
339	struct super_block *sb = dentry->d_sb;
340	const char *astr;
341	const u16 *dstr;
342	int casefold, decompose, size, len;
343	unsigned long hash;
344	wchar_t c;
345	u16 c2;
346
347	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
348	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
349	hash = init_name_hash();
350	astr = str->name;
351	len = str->len;
352	while (len > 0) {
353		int uninitialized_var(dsize);
354		size = asc2unichar(sb, astr, len, &c);
355		astr += size;
356		len -= size;
357
358		if (decompose)
359			dstr = decompose_unichar(c, &dsize);
360		else
361			dstr = NULL;
362		if (dstr) {
363			do {
364				c2 = *dstr++;
365				if (casefold)
366					c2 = case_fold(c2);
367				if (!casefold || c2)
368					hash = partial_name_hash(c2, hash);
369			} while (--dsize > 0);
370		} else {
371			c2 = c;
372			if (casefold)
373				c2 = case_fold(c2);
374			if (!casefold || c2)
375				hash = partial_name_hash(c2, hash);
376		}
377	}
378	str->hash = end_name_hash(hash);
379
380	return 0;
381}
382
383/*
384 * Compare strings with HFS+ filename ordering.
385 * Composed unicode characters are decomposed and case-folding is performed
386 * if the appropriate bits are (un)set on the superblock.
387 */
388int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
389		unsigned int len, const char *str, const struct qstr *name)
390{
391	struct super_block *sb = parent->d_sb;
392	int casefold, decompose, size;
393	int dsize1, dsize2, len1, len2;
394	const u16 *dstr1, *dstr2;
395	const char *astr1, *astr2;
396	u16 c1, c2;
397	wchar_t c;
398
399	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
400	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
401	astr1 = str;
402	len1 = len;
403	astr2 = name->name;
404	len2 = name->len;
405	dsize1 = dsize2 = 0;
406	dstr1 = dstr2 = NULL;
407
408	while (len1 > 0 && len2 > 0) {
409		if (!dsize1) {
410			size = asc2unichar(sb, astr1, len1, &c);
411			astr1 += size;
412			len1 -= size;
413
414			if (decompose)
415				dstr1 = decompose_unichar(c, &dsize1);
416			if (!decompose || !dstr1) {
417				c1 = c;
418				dstr1 = &c1;
419				dsize1 = 1;
420			}
421		}
422
423		if (!dsize2) {
424			size = asc2unichar(sb, astr2, len2, &c);
425			astr2 += size;
426			len2 -= size;
427
428			if (decompose)
429				dstr2 = decompose_unichar(c, &dsize2);
430			if (!decompose || !dstr2) {
431				c2 = c;
432				dstr2 = &c2;
433				dsize2 = 1;
434			}
435		}
436
437		c1 = *dstr1;
438		c2 = *dstr2;
439		if (casefold) {
440			c1 = case_fold(c1);
441			if (!c1) {
442				dstr1++;
443				dsize1--;
444				continue;
445			}
446			c2 = case_fold(c2);
447			if (!c2) {
448				dstr2++;
449				dsize2--;
450				continue;
451			}
452		}
453		if (c1 < c2)
454			return -1;
455		else if (c1 > c2)
456			return 1;
457
458		dstr1++;
459		dsize1--;
460		dstr2++;
461		dsize2--;
462	}
463
464	if (len1 < len2)
465		return -1;
466	if (len1 > len2)
467		return 1;
468	return 0;
469}
470