1/*
2 *	Routines to identify caches on Intel CPU.
3 *
4 *	Changes:
5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
8 */
9
10#include <linux/slab.h>
11#include <linux/cacheinfo.h>
12#include <linux/cpu.h>
13#include <linux/sched.h>
14#include <linux/sysfs.h>
15#include <linux/pci.h>
16
17#include <asm/processor.h>
18#include <asm/amd_nb.h>
19#include <asm/smp.h>
20
21#define LVL_1_INST	1
22#define LVL_1_DATA	2
23#define LVL_2		3
24#define LVL_3		4
25#define LVL_TRACE	5
26
27struct _cache_table {
28	unsigned char descriptor;
29	char cache_type;
30	short size;
31};
32
33#define MB(x)	((x) * 1024)
34
35/* All the cache descriptor types we care about (no TLB or
36   trace cache entries) */
37
38static const struct _cache_table cache_table[] =
39{
40	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
41	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
42	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
43	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
44	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
45	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
46	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
47	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
48	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
49	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
50	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
51	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
52	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
53	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
54	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
55	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
56	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
57	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
58	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
59	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
61	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
62	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
63	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
64	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
65	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
66	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
67	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
68	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
90	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
91	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
92	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
93	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
94	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
95	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
96	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
97	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
98	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
99	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
111	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
112	{ 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117	CTYPE_NULL = 0,
118	CTYPE_DATA = 1,
119	CTYPE_INST = 2,
120	CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124	struct {
125		enum _cache_type	type:5;
126		unsigned int		level:3;
127		unsigned int		is_self_initializing:1;
128		unsigned int		is_fully_associative:1;
129		unsigned int		reserved:4;
130		unsigned int		num_threads_sharing:12;
131		unsigned int		num_cores_on_die:6;
132	} split;
133	u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137	struct {
138		unsigned int		coherency_line_size:12;
139		unsigned int		physical_line_partition:10;
140		unsigned int		ways_of_associativity:10;
141	} split;
142	u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146	struct {
147		unsigned int		number_of_sets:32;
148	} split;
149	u32 full;
150};
151
152struct _cpuid4_info_regs {
153	union _cpuid4_leaf_eax eax;
154	union _cpuid4_leaf_ebx ebx;
155	union _cpuid4_leaf_ecx ecx;
156	unsigned long size;
157	struct amd_northbridge *nb;
158};
159
160static unsigned short num_cache_leaves;
161
162/* AMD doesn't have CPUID4. Emulate it here to report the same
163   information to the user.  This makes some assumptions about the machine:
164   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
165
166   In theory the TLBs could be reported as fake type (they are in "dummy").
167   Maybe later */
168union l1_cache {
169	struct {
170		unsigned line_size:8;
171		unsigned lines_per_tag:8;
172		unsigned assoc:8;
173		unsigned size_in_kb:8;
174	};
175	unsigned val;
176};
177
178union l2_cache {
179	struct {
180		unsigned line_size:8;
181		unsigned lines_per_tag:4;
182		unsigned assoc:4;
183		unsigned size_in_kb:16;
184	};
185	unsigned val;
186};
187
188union l3_cache {
189	struct {
190		unsigned line_size:8;
191		unsigned lines_per_tag:4;
192		unsigned assoc:4;
193		unsigned res:2;
194		unsigned size_encoded:14;
195	};
196	unsigned val;
197};
198
199static const unsigned short assocs[] = {
200	[1] = 1,
201	[2] = 2,
202	[4] = 4,
203	[6] = 8,
204	[8] = 16,
205	[0xa] = 32,
206	[0xb] = 48,
207	[0xc] = 64,
208	[0xd] = 96,
209	[0xe] = 128,
210	[0xf] = 0xffff /* fully associative - no way to show this currently */
211};
212
213static const unsigned char levels[] = { 1, 1, 2, 3 };
214static const unsigned char types[] = { 1, 2, 3, 3 };
215
216static const enum cache_type cache_type_map[] = {
217	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
218	[CTYPE_DATA] = CACHE_TYPE_DATA,
219	[CTYPE_INST] = CACHE_TYPE_INST,
220	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
221};
222
223static void
224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225		     union _cpuid4_leaf_ebx *ebx,
226		     union _cpuid4_leaf_ecx *ecx)
227{
228	unsigned dummy;
229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
230	union l1_cache l1i, l1d;
231	union l2_cache l2;
232	union l3_cache l3;
233	union l1_cache *l1 = &l1d;
234
235	eax->full = 0;
236	ebx->full = 0;
237	ecx->full = 0;
238
239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241
242	switch (leaf) {
243	case 1:
244		l1 = &l1i;
245	case 0:
246		if (!l1->val)
247			return;
248		assoc = assocs[l1->assoc];
249		line_size = l1->line_size;
250		lines_per_tag = l1->lines_per_tag;
251		size_in_kb = l1->size_in_kb;
252		break;
253	case 2:
254		if (!l2.val)
255			return;
256		assoc = assocs[l2.assoc];
257		line_size = l2.line_size;
258		lines_per_tag = l2.lines_per_tag;
259		/* cpu_data has errata corrections for K7 applied */
260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261		break;
262	case 3:
263		if (!l3.val)
264			return;
265		assoc = assocs[l3.assoc];
266		line_size = l3.line_size;
267		lines_per_tag = l3.lines_per_tag;
268		size_in_kb = l3.size_encoded * 512;
269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
270			size_in_kb = size_in_kb >> 1;
271			assoc = assoc >> 1;
272		}
273		break;
274	default:
275		return;
276	}
277
278	eax->split.is_self_initializing = 1;
279	eax->split.type = types[leaf];
280	eax->split.level = levels[leaf];
281	eax->split.num_threads_sharing = 0;
282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283
284
285	if (assoc == 0xffff)
286		eax->split.is_fully_associative = 1;
287	ebx->split.coherency_line_size = line_size - 1;
288	ebx->split.ways_of_associativity = assoc - 1;
289	ebx->split.physical_line_partition = lines_per_tag - 1;
290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
291		(ebx->split.ways_of_associativity + 1) - 1;
292}
293
294#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
295
296/*
297 * L3 cache descriptors
298 */
299static void amd_calc_l3_indices(struct amd_northbridge *nb)
300{
301	struct amd_l3_cache *l3 = &nb->l3_cache;
302	unsigned int sc0, sc1, sc2, sc3;
303	u32 val = 0;
304
305	pci_read_config_dword(nb->misc, 0x1C4, &val);
306
307	/* calculate subcache sizes */
308	l3->subcaches[0] = sc0 = !(val & BIT(0));
309	l3->subcaches[1] = sc1 = !(val & BIT(4));
310
311	if (boot_cpu_data.x86 == 0x15) {
312		l3->subcaches[0] = sc0 += !(val & BIT(1));
313		l3->subcaches[1] = sc1 += !(val & BIT(5));
314	}
315
316	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
317	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
318
319	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
320}
321
322/*
323 * check whether a slot used for disabling an L3 index is occupied.
324 * @l3: L3 cache descriptor
325 * @slot: slot number (0..1)
326 *
327 * @returns: the disabled index if used or negative value if slot free.
328 */
329static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
330{
331	unsigned int reg = 0;
332
333	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
334
335	/* check whether this slot is activated already */
336	if (reg & (3UL << 30))
337		return reg & 0xfff;
338
339	return -1;
340}
341
342static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
343				  unsigned int slot)
344{
345	int index;
346	struct amd_northbridge *nb = this_leaf->priv;
347
348	index = amd_get_l3_disable_slot(nb, slot);
349	if (index >= 0)
350		return sprintf(buf, "%d\n", index);
351
352	return sprintf(buf, "FREE\n");
353}
354
355#define SHOW_CACHE_DISABLE(slot)					\
356static ssize_t								\
357cache_disable_##slot##_show(struct device *dev,				\
358			    struct device_attribute *attr, char *buf)	\
359{									\
360	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
361	return show_cache_disable(this_leaf, buf, slot);		\
362}
363SHOW_CACHE_DISABLE(0)
364SHOW_CACHE_DISABLE(1)
365
366static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
367				 unsigned slot, unsigned long idx)
368{
369	int i;
370
371	idx |= BIT(30);
372
373	/*
374	 *  disable index in all 4 subcaches
375	 */
376	for (i = 0; i < 4; i++) {
377		u32 reg = idx | (i << 20);
378
379		if (!nb->l3_cache.subcaches[i])
380			continue;
381
382		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
383
384		/*
385		 * We need to WBINVD on a core on the node containing the L3
386		 * cache which indices we disable therefore a simple wbinvd()
387		 * is not sufficient.
388		 */
389		wbinvd_on_cpu(cpu);
390
391		reg |= BIT(31);
392		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
393	}
394}
395
396/*
397 * disable a L3 cache index by using a disable-slot
398 *
399 * @l3:    L3 cache descriptor
400 * @cpu:   A CPU on the node containing the L3 cache
401 * @slot:  slot number (0..1)
402 * @index: index to disable
403 *
404 * @return: 0 on success, error status on failure
405 */
406static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
407			    unsigned slot, unsigned long index)
408{
409	int ret = 0;
410
411	/*  check if @slot is already used or the index is already disabled */
412	ret = amd_get_l3_disable_slot(nb, slot);
413	if (ret >= 0)
414		return -EEXIST;
415
416	if (index > nb->l3_cache.indices)
417		return -EINVAL;
418
419	/* check whether the other slot has disabled the same index already */
420	if (index == amd_get_l3_disable_slot(nb, !slot))
421		return -EEXIST;
422
423	amd_l3_disable_index(nb, cpu, slot, index);
424
425	return 0;
426}
427
428static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
429				   const char *buf, size_t count,
430				   unsigned int slot)
431{
432	unsigned long val = 0;
433	int cpu, err = 0;
434	struct amd_northbridge *nb = this_leaf->priv;
435
436	if (!capable(CAP_SYS_ADMIN))
437		return -EPERM;
438
439	cpu = cpumask_first(&this_leaf->shared_cpu_map);
440
441	if (kstrtoul(buf, 10, &val) < 0)
442		return -EINVAL;
443
444	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
445	if (err) {
446		if (err == -EEXIST)
447			pr_warning("L3 slot %d in use/index already disabled!\n",
448				   slot);
449		return err;
450	}
451	return count;
452}
453
454#define STORE_CACHE_DISABLE(slot)					\
455static ssize_t								\
456cache_disable_##slot##_store(struct device *dev,			\
457			     struct device_attribute *attr,		\
458			     const char *buf, size_t count)		\
459{									\
460	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
461	return store_cache_disable(this_leaf, buf, count, slot);	\
462}
463STORE_CACHE_DISABLE(0)
464STORE_CACHE_DISABLE(1)
465
466static ssize_t subcaches_show(struct device *dev,
467			      struct device_attribute *attr, char *buf)
468{
469	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
470	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
471
472	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
473}
474
475static ssize_t subcaches_store(struct device *dev,
476			       struct device_attribute *attr,
477			       const char *buf, size_t count)
478{
479	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
480	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
481	unsigned long val;
482
483	if (!capable(CAP_SYS_ADMIN))
484		return -EPERM;
485
486	if (kstrtoul(buf, 16, &val) < 0)
487		return -EINVAL;
488
489	if (amd_set_subcaches(cpu, val))
490		return -EINVAL;
491
492	return count;
493}
494
495static DEVICE_ATTR_RW(cache_disable_0);
496static DEVICE_ATTR_RW(cache_disable_1);
497static DEVICE_ATTR_RW(subcaches);
498
499static umode_t
500cache_private_attrs_is_visible(struct kobject *kobj,
501			       struct attribute *attr, int unused)
502{
503	struct device *dev = kobj_to_dev(kobj);
504	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
505	umode_t mode = attr->mode;
506
507	if (!this_leaf->priv)
508		return 0;
509
510	if ((attr == &dev_attr_subcaches.attr) &&
511	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512		return mode;
513
514	if ((attr == &dev_attr_cache_disable_0.attr ||
515	     attr == &dev_attr_cache_disable_1.attr) &&
516	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
517		return mode;
518
519	return 0;
520}
521
522static struct attribute_group cache_private_group = {
523	.is_visible = cache_private_attrs_is_visible,
524};
525
526static void init_amd_l3_attrs(void)
527{
528	int n = 1;
529	static struct attribute **amd_l3_attrs;
530
531	if (amd_l3_attrs) /* already initialized */
532		return;
533
534	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
535		n += 2;
536	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537		n += 1;
538
539	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
540	if (!amd_l3_attrs)
541		return;
542
543	n = 0;
544	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
545		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
546		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
547	}
548	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
550
551	cache_private_group.attrs = amd_l3_attrs;
552}
553
554const struct attribute_group *
555cache_get_priv_group(struct cacheinfo *this_leaf)
556{
557	struct amd_northbridge *nb = this_leaf->priv;
558
559	if (this_leaf->level < 3 || !nb)
560		return NULL;
561
562	if (nb && nb->l3_cache.indices)
563		init_amd_l3_attrs();
564
565	return &cache_private_group;
566}
567
568static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
569{
570	int node;
571
572	/* only for L3, and not in virtualized environments */
573	if (index < 3)
574		return;
575
576	node = amd_get_nb_id(smp_processor_id());
577	this_leaf->nb = node_to_amd_nb(node);
578	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
579		amd_calc_l3_indices(this_leaf->nb);
580}
581#else
582#define amd_init_l3_cache(x, y)
583#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
584
585static int
586cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
587{
588	union _cpuid4_leaf_eax	eax;
589	union _cpuid4_leaf_ebx	ebx;
590	union _cpuid4_leaf_ecx	ecx;
591	unsigned		edx;
592
593	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
594		if (cpu_has_topoext)
595			cpuid_count(0x8000001d, index, &eax.full,
596				    &ebx.full, &ecx.full, &edx);
597		else
598			amd_cpuid4(index, &eax, &ebx, &ecx);
599		amd_init_l3_cache(this_leaf, index);
600	} else {
601		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
602	}
603
604	if (eax.split.type == CTYPE_NULL)
605		return -EIO; /* better error ? */
606
607	this_leaf->eax = eax;
608	this_leaf->ebx = ebx;
609	this_leaf->ecx = ecx;
610	this_leaf->size = (ecx.split.number_of_sets          + 1) *
611			  (ebx.split.coherency_line_size     + 1) *
612			  (ebx.split.physical_line_partition + 1) *
613			  (ebx.split.ways_of_associativity   + 1);
614	return 0;
615}
616
617static int find_num_cache_leaves(struct cpuinfo_x86 *c)
618{
619	unsigned int		eax, ebx, ecx, edx, op;
620	union _cpuid4_leaf_eax	cache_eax;
621	int 			i = -1;
622
623	if (c->x86_vendor == X86_VENDOR_AMD)
624		op = 0x8000001d;
625	else
626		op = 4;
627
628	do {
629		++i;
630		/* Do cpuid(op) loop to find out num_cache_leaves */
631		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
632		cache_eax.full = eax;
633	} while (cache_eax.split.type != CTYPE_NULL);
634	return i;
635}
636
637void init_amd_cacheinfo(struct cpuinfo_x86 *c)
638{
639
640	if (cpu_has_topoext) {
641		num_cache_leaves = find_num_cache_leaves(c);
642	} else if (c->extended_cpuid_level >= 0x80000006) {
643		if (cpuid_edx(0x80000006) & 0xf000)
644			num_cache_leaves = 4;
645		else
646			num_cache_leaves = 3;
647	}
648}
649
650unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
651{
652	/* Cache sizes */
653	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
654	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
655	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657#ifdef CONFIG_SMP
658	unsigned int cpu = c->cpu_index;
659#endif
660
661	if (c->cpuid_level > 3) {
662		static int is_initialized;
663
664		if (is_initialized == 0) {
665			/* Init num_cache_leaves from boot CPU */
666			num_cache_leaves = find_num_cache_leaves(c);
667			is_initialized++;
668		}
669
670		/*
671		 * Whenever possible use cpuid(4), deterministic cache
672		 * parameters cpuid leaf to find the cache details
673		 */
674		for (i = 0; i < num_cache_leaves; i++) {
675			struct _cpuid4_info_regs this_leaf = {};
676			int retval;
677
678			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
679			if (retval < 0)
680				continue;
681
682			switch (this_leaf.eax.split.level) {
683			case 1:
684				if (this_leaf.eax.split.type == CTYPE_DATA)
685					new_l1d = this_leaf.size/1024;
686				else if (this_leaf.eax.split.type == CTYPE_INST)
687					new_l1i = this_leaf.size/1024;
688				break;
689			case 2:
690				new_l2 = this_leaf.size/1024;
691				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
692				index_msb = get_count_order(num_threads_sharing);
693				l2_id = c->apicid & ~((1 << index_msb) - 1);
694				break;
695			case 3:
696				new_l3 = this_leaf.size/1024;
697				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
698				index_msb = get_count_order(num_threads_sharing);
699				l3_id = c->apicid & ~((1 << index_msb) - 1);
700				break;
701			default:
702				break;
703			}
704		}
705	}
706	/*
707	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
708	 * trace cache
709	 */
710	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
711		/* supports eax=2  call */
712		int j, n;
713		unsigned int regs[4];
714		unsigned char *dp = (unsigned char *)regs;
715		int only_trace = 0;
716
717		if (num_cache_leaves != 0 && c->x86 == 15)
718			only_trace = 1;
719
720		/* Number of times to iterate */
721		n = cpuid_eax(2) & 0xFF;
722
723		for (i = 0 ; i < n ; i++) {
724			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
725
726			/* If bit 31 is set, this is an unknown format */
727			for (j = 0 ; j < 3 ; j++)
728				if (regs[j] & (1 << 31))
729					regs[j] = 0;
730
731			/* Byte 0 is level count, not a descriptor */
732			for (j = 1 ; j < 16 ; j++) {
733				unsigned char des = dp[j];
734				unsigned char k = 0;
735
736				/* look up this descriptor in the table */
737				while (cache_table[k].descriptor != 0) {
738					if (cache_table[k].descriptor == des) {
739						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
740							break;
741						switch (cache_table[k].cache_type) {
742						case LVL_1_INST:
743							l1i += cache_table[k].size;
744							break;
745						case LVL_1_DATA:
746							l1d += cache_table[k].size;
747							break;
748						case LVL_2:
749							l2 += cache_table[k].size;
750							break;
751						case LVL_3:
752							l3 += cache_table[k].size;
753							break;
754						case LVL_TRACE:
755							trace += cache_table[k].size;
756							break;
757						}
758
759						break;
760					}
761
762					k++;
763				}
764			}
765		}
766	}
767
768	if (new_l1d)
769		l1d = new_l1d;
770
771	if (new_l1i)
772		l1i = new_l1i;
773
774	if (new_l2) {
775		l2 = new_l2;
776#ifdef CONFIG_SMP
777		per_cpu(cpu_llc_id, cpu) = l2_id;
778#endif
779	}
780
781	if (new_l3) {
782		l3 = new_l3;
783#ifdef CONFIG_SMP
784		per_cpu(cpu_llc_id, cpu) = l3_id;
785#endif
786	}
787
788#ifdef CONFIG_SMP
789	/*
790	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
791	 * turns means that the only possibility is SMT (as indicated in
792	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
793	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
794	 * c->phys_proc_id.
795	 */
796	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
797		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
798#endif
799
800	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
801
802	return l2;
803}
804
805static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
806				    struct _cpuid4_info_regs *base)
807{
808	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
809	struct cacheinfo *this_leaf;
810	int i, sibling;
811
812	if (cpu_has_topoext) {
813		unsigned int apicid, nshared, first, last;
814
815		this_leaf = this_cpu_ci->info_list + index;
816		nshared = base->eax.split.num_threads_sharing + 1;
817		apicid = cpu_data(cpu).apicid;
818		first = apicid - (apicid % nshared);
819		last = first + nshared - 1;
820
821		for_each_online_cpu(i) {
822			this_cpu_ci = get_cpu_cacheinfo(i);
823			if (!this_cpu_ci->info_list)
824				continue;
825
826			apicid = cpu_data(i).apicid;
827			if ((apicid < first) || (apicid > last))
828				continue;
829
830			this_leaf = this_cpu_ci->info_list + index;
831
832			for_each_online_cpu(sibling) {
833				apicid = cpu_data(sibling).apicid;
834				if ((apicid < first) || (apicid > last))
835					continue;
836				cpumask_set_cpu(sibling,
837						&this_leaf->shared_cpu_map);
838			}
839		}
840	} else if (index == 3) {
841		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
842			this_cpu_ci = get_cpu_cacheinfo(i);
843			if (!this_cpu_ci->info_list)
844				continue;
845			this_leaf = this_cpu_ci->info_list + index;
846			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
847				if (!cpu_online(sibling))
848					continue;
849				cpumask_set_cpu(sibling,
850						&this_leaf->shared_cpu_map);
851			}
852		}
853	} else
854		return 0;
855
856	return 1;
857}
858
859static void __cache_cpumap_setup(unsigned int cpu, int index,
860				 struct _cpuid4_info_regs *base)
861{
862	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
863	struct cacheinfo *this_leaf, *sibling_leaf;
864	unsigned long num_threads_sharing;
865	int index_msb, i;
866	struct cpuinfo_x86 *c = &cpu_data(cpu);
867
868	if (c->x86_vendor == X86_VENDOR_AMD) {
869		if (__cache_amd_cpumap_setup(cpu, index, base))
870			return;
871	}
872
873	this_leaf = this_cpu_ci->info_list + index;
874	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
875
876	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
877	if (num_threads_sharing == 1)
878		return;
879
880	index_msb = get_count_order(num_threads_sharing);
881
882	for_each_online_cpu(i)
883		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
884			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
885
886			if (i == cpu || !sib_cpu_ci->info_list)
887				continue;/* skip if itself or no cacheinfo */
888			sibling_leaf = sib_cpu_ci->info_list + index;
889			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
890			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
891		}
892}
893
894static void ci_leaf_init(struct cacheinfo *this_leaf,
895			 struct _cpuid4_info_regs *base)
896{
897	this_leaf->level = base->eax.split.level;
898	this_leaf->type = cache_type_map[base->eax.split.type];
899	this_leaf->coherency_line_size =
900				base->ebx.split.coherency_line_size + 1;
901	this_leaf->ways_of_associativity =
902				base->ebx.split.ways_of_associativity + 1;
903	this_leaf->size = base->size;
904	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
905	this_leaf->physical_line_partition =
906				base->ebx.split.physical_line_partition + 1;
907	this_leaf->priv = base->nb;
908}
909
910static int __init_cache_level(unsigned int cpu)
911{
912	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
913
914	if (!num_cache_leaves)
915		return -ENOENT;
916	if (!this_cpu_ci)
917		return -EINVAL;
918	this_cpu_ci->num_levels = 3;
919	this_cpu_ci->num_leaves = num_cache_leaves;
920	return 0;
921}
922
923static int __populate_cache_leaves(unsigned int cpu)
924{
925	unsigned int idx, ret;
926	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
927	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
928	struct _cpuid4_info_regs id4_regs = {};
929
930	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
931		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
932		if (ret)
933			return ret;
934		ci_leaf_init(this_leaf++, &id4_regs);
935		__cache_cpumap_setup(cpu, idx, &id4_regs);
936	}
937	return 0;
938}
939
940DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
941DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
942