1/*
2 * arch/metag/mm/cache.c
3 *
4 * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
5 *
6 * This program is free software; you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License version 2 as published by the
8 * Free Software Foundation.
9 *
10 * Cache control code
11 */
12
13#include <linux/export.h>
14#include <linux/io.h>
15#include <asm/cacheflush.h>
16#include <asm/core_reg.h>
17#include <asm/global_lock.h>
18#include <asm/metag_isa.h>
19#include <asm/metag_mem.h>
20#include <asm/metag_regs.h>
21
22#define DEFAULT_CACHE_WAYS_LOG2	2
23
24/*
25 * Size of a set in the caches. Initialised for default 16K stride, adjusted
26 * according to values passed through TBI global heap segment via LDLK (on ATP)
27 * or config registers (on HTP/MTP)
28 */
29static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
30					- DEFAULT_CACHE_WAYS_LOG2;
31static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
32					- DEFAULT_CACHE_WAYS_LOG2;
33/*
34 * The number of sets in the caches. Initialised for HTP/ATP, adjusted
35 * according to NOMMU setting in config registers
36 */
37static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
38static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
39
40#ifndef CONFIG_METAG_META12
41/**
42 * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
43 */
44static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
45
46#define LNKGET_CONSTANT 0xdeadbeef
47
48static void __init metag_lnkget_probe(void)
49{
50	int temp;
51	long flags;
52
53	/*
54	 * It's conceivable the user has configured a globally coherent cache
55	 * shared with non-Linux hardware threads, so use LOCK2 to prevent them
56	 * from executing and causing cache eviction during the test.
57	 */
58	__global_lock2(flags);
59
60	/* read a value to bring it into the cache */
61	(void)lnkget_testdata[0];
62	lnkget_testdata[0] = 0;
63
64	/* lnkget/lnkset it to modify it */
65	asm volatile(
66		"1:	LNKGETD %0, [%1]\n"
67		"	LNKSETD [%1], %2\n"
68		"	DEFR	%0, TXSTAT\n"
69		"	ANDT	%0, %0, #HI(0x3f000000)\n"
70		"	CMPT	%0, #HI(0x02000000)\n"
71		"	BNZ	1b\n"
72		: "=&d" (temp)
73		: "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
74		: "cc");
75
76	/* re-read it to see if the cached value changed */
77	temp = lnkget_testdata[0];
78
79	__global_unlock2(flags);
80
81	/* flush the cache line to fix any incoherency */
82	__builtin_dcache_flush((void *)&lnkget_testdata[0]);
83
84#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
85	/* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
86	if (temp == LNKGET_CONSTANT)
87		pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
88#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
89	/*
90	 * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
91	 * because the kernel is configured to use LNKGET/SET for atomicity
92	 */
93	WARN(temp != LNKGET_CONSTANT,
94	     "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
95	     "Expect kernel failure as it's used for atomicity primitives\n");
96#elif defined(CONFIG_SMP)
97	/*
98	 * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
99	 * gateway page won't flush and userland could break.
100	 */
101	WARN(temp != LNKGET_CONSTANT,
102	     "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
103	     "Expect userland failure as it's used for user gateway page\n");
104#else
105	/*
106	 * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
107	 * doesn't actually matter as it doesn't have any effect on !SMP &&
108	 * !ATOMICITY_LNKGET.
109	 */
110	if (temp != LNKGET_CONSTANT)
111		pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
112#endif
113}
114#endif /* !CONFIG_METAG_META12 */
115
116/**
117 * metag_cache_probe() - Probe L1 cache configuration.
118 *
119 * Probe the L1 cache configuration to aid the L1 physical cache flushing
120 * functions.
121 */
122void __init metag_cache_probe(void)
123{
124#ifndef CONFIG_METAG_META12
125	int coreid = metag_in32(METAC_CORE_ID);
126	int config = metag_in32(METAC_CORE_CONFIG2);
127	int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
128
129	if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
130	    cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
131		icache_sets_log2 = 1;
132		dcache_sets_log2 = 1;
133	}
134
135	/* For normal size caches, the smallest size is 4Kb.
136	   For small caches, the smallest size is 64b */
137	icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
138				? 6 : 12;
139	icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
140				>> METAC_CORE_C2ICSZ_S;
141	icache_set_shift -= icache_sets_log2;
142
143	dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
144				? 6 : 12;
145	dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
146				>> METAC_CORECFG2_DCSZ_S;
147	dcache_set_shift -= dcache_sets_log2;
148
149	metag_lnkget_probe();
150#else
151	/* Extract cache sizes from global heap segment */
152	unsigned long val, u;
153	int width, shift, addend;
154	PTBISEG seg;
155
156	seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
157					  TBID_SEGSCOPE_GLOBAL,
158					  TBID_SEGTYPE_HEAP));
159	if (seg != NULL) {
160		val = seg->Data[1];
161
162		/* Work out width of I-cache size bit-field */
163		u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
164		       >> METAG_TBI_ICACHE_SIZE_S;
165		width = 0;
166		while (u & 1) {
167			width++;
168			u >>= 1;
169		}
170		/* Extract sign-extended size addend value */
171		shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
172		addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
173				 << shift)
174			>> (shift + METAG_TBI_ICACHE_SIZE_S);
175		/* Now calculate I-cache set size */
176		icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
177				    - DEFAULT_CACHE_WAYS_LOG2)
178					+ addend;
179
180		/* Similarly for D-cache */
181		u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
182		       >> METAG_TBI_DCACHE_SIZE_S;
183		width = 0;
184		while (u & 1) {
185			width++;
186			u >>= 1;
187		}
188		shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
189		addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
190				 << shift)
191			>> (shift + METAG_TBI_DCACHE_SIZE_S);
192		dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
193				    - DEFAULT_CACHE_WAYS_LOG2)
194					+ addend;
195	}
196#endif
197}
198
199static void metag_phys_data_cache_flush(const void *start)
200{
201	unsigned long flush0, flush1, flush2, flush3;
202	int loops, step;
203	int thread;
204	int part, offset;
205	int set_shift;
206
207	/* Use a sequence of writes to flush the cache region requested */
208	thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
209					  >> TXENABLE_THREAD_S;
210
211	/* Cache is broken into sets which lie in contiguous RAMs */
212	set_shift = dcache_set_shift;
213
214	/* Move to the base of the physical cache flush region */
215	flush0 = LINSYSCFLUSH_DCACHE_LINE;
216	step   = 64;
217
218	/* Get partition data for this thread */
219	part = metag_in32(SYSC_DCPART0 +
220			      (SYSC_xCPARTn_STRIDE * thread));
221
222	if ((int)start < 0)
223		/* Access Global vs Local partition */
224		part >>= SYSC_xCPARTG_AND_S
225			- SYSC_xCPARTL_AND_S;
226
227	/* Extract offset and move SetOff */
228	offset = (part & SYSC_xCPARTL_OR_BITS)
229			>> SYSC_xCPARTL_OR_S;
230	flush0 += (offset << (set_shift - 4));
231
232	/* Shrink size */
233	part = (part & SYSC_xCPARTL_AND_BITS)
234			>> SYSC_xCPARTL_AND_S;
235	loops = ((part + 1) << (set_shift - 4));
236
237	/* Reduce loops by step of cache line size */
238	loops /= step;
239
240	flush1 = flush0 + (1 << set_shift);
241	flush2 = flush0 + (2 << set_shift);
242	flush3 = flush0 + (3 << set_shift);
243
244	if (dcache_sets_log2 == 1) {
245		flush2 = flush1;
246		flush3 = flush1 + step;
247		flush1 = flush0 + step;
248		step  <<= 1;
249		loops >>= 1;
250	}
251
252	/* Clear loops ways in cache */
253	while (loops-- != 0) {
254		/* Clear the ways. */
255#if 0
256		/*
257		 * GCC doesn't generate very good code for this so we
258		 * provide inline assembly instead.
259		 */
260		metag_out8(0, flush0);
261		metag_out8(0, flush1);
262		metag_out8(0, flush2);
263		metag_out8(0, flush3);
264
265		flush0 += step;
266		flush1 += step;
267		flush2 += step;
268		flush3 += step;
269#else
270		asm volatile (
271			"SETB\t[%0+%4++],%5\n"
272			"SETB\t[%1+%4++],%5\n"
273			"SETB\t[%2+%4++],%5\n"
274			"SETB\t[%3+%4++],%5\n"
275			: "+e" (flush0),
276			  "+e" (flush1),
277			  "+e" (flush2),
278			  "+e" (flush3)
279			: "e" (step), "a" (0));
280#endif
281	}
282}
283
284void metag_data_cache_flush_all(const void *start)
285{
286	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
287		/* No need to flush the data cache it's not actually enabled */
288		return;
289
290	metag_phys_data_cache_flush(start);
291}
292
293void metag_data_cache_flush(const void *start, int bytes)
294{
295	unsigned long flush0;
296	int loops, step;
297
298	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
299		/* No need to flush the data cache it's not actually enabled */
300		return;
301
302	if (bytes >= 4096) {
303		metag_phys_data_cache_flush(start);
304		return;
305	}
306
307	/* Use linear cache flush mechanism on META IP */
308	flush0 = (int)start;
309	loops  = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
310					(DCACHE_LINE_BYTES - 1);
311	loops  >>= DCACHE_LINE_S;
312
313#define PRIM_FLUSH(addr, offset) do {			\
314	int __addr = ((int) (addr)) + ((offset) * 64);	\
315	__builtin_dcache_flush((void *)(__addr));	\
316	} while (0)
317
318#define LOOP_INC (4*64)
319
320	do {
321		/* By default stop */
322		step = 0;
323
324		switch (loops) {
325		/* Drop Thru Cases! */
326		default:
327			PRIM_FLUSH(flush0, 3);
328			loops -= 4;
329			step = 1;
330		case 3:
331			PRIM_FLUSH(flush0, 2);
332		case 2:
333			PRIM_FLUSH(flush0, 1);
334		case 1:
335			PRIM_FLUSH(flush0, 0);
336			flush0 += LOOP_INC;
337		case 0:
338			break;
339		}
340	} while (step);
341}
342EXPORT_SYMBOL(metag_data_cache_flush);
343
344static void metag_phys_code_cache_flush(const void *start, int bytes)
345{
346	unsigned long flush0, flush1, flush2, flush3, end_set;
347	int loops, step;
348	int thread;
349	int set_shift, set_size;
350	int part, offset;
351
352	/* Use a sequence of writes to flush the cache region requested */
353	thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
354					  >> TXENABLE_THREAD_S;
355	set_shift = icache_set_shift;
356
357	/* Move to the base of the physical cache flush region */
358	flush0 = LINSYSCFLUSH_ICACHE_LINE;
359	step   = 64;
360
361	/* Get partition code for this thread */
362	part = metag_in32(SYSC_ICPART0 +
363			  (SYSC_xCPARTn_STRIDE * thread));
364
365	if ((int)start < 0)
366		/* Access Global vs Local partition */
367		part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
368
369	/* Extract offset and move SetOff */
370	offset = (part & SYSC_xCPARTL_OR_BITS)
371			>> SYSC_xCPARTL_OR_S;
372	flush0 += (offset << (set_shift - 4));
373
374	/* Shrink size */
375	part = (part & SYSC_xCPARTL_AND_BITS)
376			>> SYSC_xCPARTL_AND_S;
377	loops = ((part + 1) << (set_shift - 4));
378
379	/* Where does the Set end? */
380	end_set = flush0 + loops;
381	set_size = loops;
382
383#ifdef CONFIG_METAG_META12
384	if ((bytes < 4096) && (bytes < loops)) {
385		/* Unreachable on HTP/MTP */
386		/* Only target the sets that could be relavent */
387		flush0 += (loops - step) & ((int) start);
388		loops = (((int) start) & (step-1)) + bytes + step - 1;
389	}
390#endif
391
392	/* Reduce loops by step of cache line size */
393	loops /= step;
394
395	flush1 = flush0 + (1<<set_shift);
396	flush2 = flush0 + (2<<set_shift);
397	flush3 = flush0 + (3<<set_shift);
398
399	if (icache_sets_log2 == 1) {
400		flush2 = flush1;
401		flush3 = flush1 + step;
402		flush1 = flush0 + step;
403#if 0
404		/* flush0 will stop one line early in this case
405		 * (flush1 will do the final line).
406		 * However we don't correct end_set here at the moment
407		 * because it will never wrap on HTP/MTP
408		 */
409		end_set -= step;
410#endif
411		step  <<= 1;
412		loops >>= 1;
413	}
414
415	/* Clear loops ways in cache */
416	while (loops-- != 0) {
417#if 0
418		/*
419		 * GCC doesn't generate very good code for this so we
420		 * provide inline assembly instead.
421		 */
422		/* Clear the ways */
423		metag_out8(0, flush0);
424		metag_out8(0, flush1);
425		metag_out8(0, flush2);
426		metag_out8(0, flush3);
427
428		flush0 += step;
429		flush1 += step;
430		flush2 += step;
431		flush3 += step;
432#else
433		asm volatile (
434			"SETB\t[%0+%4++],%5\n"
435			"SETB\t[%1+%4++],%5\n"
436			"SETB\t[%2+%4++],%5\n"
437			"SETB\t[%3+%4++],%5\n"
438			: "+e" (flush0),
439			  "+e" (flush1),
440			  "+e" (flush2),
441			  "+e" (flush3)
442			: "e" (step), "a" (0));
443#endif
444
445		if (flush0 == end_set) {
446			/* Wrap within Set 0 */
447			flush0 -= set_size;
448			flush1 -= set_size;
449			flush2 -= set_size;
450			flush3 -= set_size;
451		}
452	}
453}
454
455void metag_code_cache_flush_all(const void *start)
456{
457	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
458		/* No need to flush the code cache it's not actually enabled */
459		return;
460
461	metag_phys_code_cache_flush(start, 4096);
462}
463EXPORT_SYMBOL(metag_code_cache_flush_all);
464
465void metag_code_cache_flush(const void *start, int bytes)
466{
467#ifndef CONFIG_METAG_META12
468	void *flush;
469	int loops, step;
470#endif /* !CONFIG_METAG_META12 */
471
472	if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
473		/* No need to flush the code cache it's not actually enabled */
474		return;
475
476#ifdef CONFIG_METAG_META12
477	/* CACHEWD isn't available on Meta1, so always do full cache flush */
478	metag_phys_code_cache_flush(start, bytes);
479
480#else /* CONFIG_METAG_META12 */
481	/* If large size do full physical cache flush */
482	if (bytes >= 4096) {
483		metag_phys_code_cache_flush(start, bytes);
484		return;
485	}
486
487	/* Use linear cache flush mechanism on META IP */
488	flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
489	loops  = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
490		(ICACHE_LINE_BYTES-1);
491	loops  >>= ICACHE_LINE_S;
492
493#define PRIM_IFLUSH(addr, offset) \
494	__builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
495
496#define LOOP_INC (4*64)
497
498	do {
499		/* By default stop */
500		step = 0;
501
502		switch (loops) {
503		/* Drop Thru Cases! */
504		default:
505			PRIM_IFLUSH(flush, 3);
506			loops -= 4;
507			step = 1;
508		case 3:
509			PRIM_IFLUSH(flush, 2);
510		case 2:
511			PRIM_IFLUSH(flush, 1);
512		case 1:
513			PRIM_IFLUSH(flush, 0);
514			flush += LOOP_INC;
515		case 0:
516			break;
517		}
518	} while (step);
519#endif /* !CONFIG_METAG_META12 */
520}
521EXPORT_SYMBOL(metag_code_cache_flush);
522