1/*
2 *  linux/drivers/video/kyro/STG4000OverlayDevice.c
3 *
4 *  Copyright (C) 2000 Imagination Technologies Ltd
5 *  Copyright (C) 2002 STMicroelectronics
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License.  See the file COPYING in the main directory of this archive
9 * for more details.
10 */
11
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/types.h>
15
16#include "STG4000Reg.h"
17#include "STG4000Interface.h"
18
19/* HW Defines */
20
21#define STG4000_NO_SCALING    0x800
22#define STG4000_NO_DECIMATION 0xFFFFFFFF
23
24/* Primary surface */
25#define STG4000_PRIM_NUM_PIX   5
26#define STG4000_PRIM_ALIGN     4
27#define STG4000_PRIM_ADDR_BITS 20
28
29#define STG4000_PRIM_MIN_WIDTH  640
30#define STG4000_PRIM_MAX_WIDTH  1600
31#define STG4000_PRIM_MIN_HEIGHT 480
32#define STG4000_PRIM_MAX_HEIGHT 1200
33
34/* Overlay surface */
35#define STG4000_OVRL_NUM_PIX   4
36#define STG4000_OVRL_ALIGN     2
37#define STG4000_OVRL_ADDR_BITS 20
38#define STG4000_OVRL_NUM_MODES 5
39
40#define STG4000_OVRL_MIN_WIDTH  0
41#define STG4000_OVRL_MAX_WIDTH  720
42#define STG4000_OVRL_MIN_HEIGHT 0
43#define STG4000_OVRL_MAX_HEIGHT 576
44
45/* Decimation and Scaling */
46static u32 adwDecim8[33] = {
47	    0xffffffff, 0xfffeffff, 0xffdffbff, 0xfefefeff, 0xfdf7efbf,
48	    0xfbdf7bdf, 0xf7bbddef, 0xeeeeeeef, 0xeeddbb77, 0xedb76db7,
49	    0xdb6db6db, 0xdb5b5b5b, 0xdab5ad6b, 0xd5ab55ab, 0xd555aaab,
50	    0xaaaaaaab, 0xaaaa5555, 0xaa952a55, 0xa94a5295, 0xa5252525,
51	    0xa4924925, 0x92491249, 0x91224489, 0x91111111, 0x90884211,
52	    0x88410821, 0x88102041, 0x81010101, 0x80800801, 0x80010001,
53	    0x80000001, 0x00000001, 0x00000000
54};
55
56typedef struct _OVRL_SRC_DEST {
57	/*clipped on-screen pixel position of overlay */
58	u32 ulDstX1;
59	u32 ulDstY1;
60	u32 ulDstX2;
61	u32 ulDstY2;
62
63	/*clipped pixel pos of source data within buffer thses need to be 128 bit word aligned */
64	u32 ulSrcX1;
65	u32 ulSrcY1;
66	u32 ulSrcX2;
67	u32 ulSrcY2;
68
69	/* on-screen pixel position of overlay */
70	s32 lDstX1;
71	s32 lDstY1;
72	s32 lDstX2;
73	s32 lDstY2;
74} OVRL_SRC_DEST;
75
76static u32 ovlWidth, ovlHeight, ovlStride;
77static int ovlLinear;
78
79void ResetOverlayRegisters(volatile STG4000REG __iomem *pSTGReg)
80{
81	u32 tmp;
82
83	/* Set Overlay address to default */
84	tmp = STG_READ_REG(DACOverlayAddr);
85	CLEAR_BITS_FRM_TO(0, 20);
86	CLEAR_BIT(31);
87	STG_WRITE_REG(DACOverlayAddr, tmp);
88
89	/* Set Overlay U address */
90	tmp = STG_READ_REG(DACOverlayUAddr);
91	CLEAR_BITS_FRM_TO(0, 20);
92	STG_WRITE_REG(DACOverlayUAddr, tmp);
93
94	/* Set Overlay V address */
95	tmp = STG_READ_REG(DACOverlayVAddr);
96	CLEAR_BITS_FRM_TO(0, 20);
97	STG_WRITE_REG(DACOverlayVAddr, tmp);
98
99	/* Set Overlay Size */
100	tmp = STG_READ_REG(DACOverlaySize);
101	CLEAR_BITS_FRM_TO(0, 10);
102	CLEAR_BITS_FRM_TO(12, 31);
103	STG_WRITE_REG(DACOverlaySize, tmp);
104
105	/* Set Overlay Vt Decimation */
106	tmp = STG4000_NO_DECIMATION;
107	STG_WRITE_REG(DACOverlayVtDec, tmp);
108
109	/* Set Overlay format to default value */
110	tmp = STG_READ_REG(DACPixelFormat);
111	CLEAR_BITS_FRM_TO(4, 7);
112	CLEAR_BITS_FRM_TO(16, 22);
113	STG_WRITE_REG(DACPixelFormat, tmp);
114
115	/* Set Vertical scaling to default */
116	tmp = STG_READ_REG(DACVerticalScal);
117	CLEAR_BITS_FRM_TO(0, 11);
118	CLEAR_BITS_FRM_TO(16, 22);
119	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
120	STG_WRITE_REG(DACVerticalScal, tmp);
121
122	/* Set Horizontal Scaling to default */
123	tmp = STG_READ_REG(DACHorizontalScal);
124	CLEAR_BITS_FRM_TO(0, 11);
125	CLEAR_BITS_FRM_TO(16, 17);
126	tmp |= STG4000_NO_SCALING;	/* Set to no scaling */
127	STG_WRITE_REG(DACHorizontalScal, tmp);
128
129	/* Set Blend mode to Alpha Blend */
130	/* ????? SG 08/11/2001 Surely this isn't the alpha blend mode,
131	   hopefully its overwrite
132	 */
133	tmp = STG_READ_REG(DACBlendCtrl);
134	CLEAR_BITS_FRM_TO(0, 30);
135	tmp = (GRAPHICS_MODE << 28);
136	STG_WRITE_REG(DACBlendCtrl, tmp);
137
138}
139
140int CreateOverlaySurface(volatile STG4000REG __iomem *pSTGReg,
141			 u32 inWidth,
142			 u32 inHeight,
143			 int bLinear,
144			 u32 ulOverlayOffset,
145			 u32 * retStride, u32 * retUVStride)
146{
147	u32 tmp;
148	u32 ulStride;
149
150	if (inWidth > STG4000_OVRL_MAX_WIDTH ||
151	    inHeight > STG4000_OVRL_MAX_HEIGHT) {
152		return -EINVAL;
153	}
154
155	/* Stride in 16 byte words - 16Bpp */
156	if (bLinear) {
157		/* Format is 16bits so num 16 byte words is width/8 */
158		if ((inWidth & 0x7) == 0) {	/* inWidth % 8 */
159			ulStride = (inWidth / 8);
160		} else {
161			/* Round up to next 16byte boundary */
162			ulStride = ((inWidth + 8) / 8);
163		}
164	} else {
165		/* Y component is 8bits so num 16 byte words is width/16 */
166		if ((inWidth & 0xf) == 0) {	/* inWidth % 16 */
167			ulStride = (inWidth / 16);
168		} else {
169			/* Round up to next 16byte boundary */
170			ulStride = ((inWidth + 16) / 16);
171		}
172	}
173
174
175	/* Set Overlay address and Format mode */
176	tmp = STG_READ_REG(DACOverlayAddr);
177	CLEAR_BITS_FRM_TO(0, 20);
178	if (bLinear) {
179		CLEAR_BIT(31);	/* Overlay format to Linear */
180	} else {
181		tmp |= SET_BIT(31);	/* Overlay format to Planer */
182	}
183
184	/* Only bits 24:4 of the Overlay address */
185	tmp |= (ulOverlayOffset >> 4);
186	STG_WRITE_REG(DACOverlayAddr, tmp);
187
188	if (!bLinear) {
189		u32 uvSize =
190		    (inWidth & 0x1) ? (inWidth + 1 / 2) : (inWidth / 2);
191		u32 uvStride;
192		u32 ulOffset;
193		/* Y component is 8bits so num 32 byte words is width/32 */
194		if ((uvSize & 0xf) == 0) {	/* inWidth % 16 */
195			uvStride = (uvSize / 16);
196		} else {
197			/* Round up to next 32byte boundary */
198			uvStride = ((uvSize + 16) / 16);
199		}
200
201		ulOffset = ulOverlayOffset + (inHeight * (ulStride * 16));
202		/* Align U,V data to 32byte boundary */
203		if ((ulOffset & 0x1f) != 0)
204			ulOffset = (ulOffset + 32L) & 0xffffffE0L;
205
206		tmp = STG_READ_REG(DACOverlayUAddr);
207		CLEAR_BITS_FRM_TO(0, 20);
208		tmp |= (ulOffset >> 4);
209		STG_WRITE_REG(DACOverlayUAddr, tmp);
210
211		ulOffset += (inHeight / 2) * (uvStride * 16);
212		/* Align U,V data to 32byte boundary */
213		if ((ulOffset & 0x1f) != 0)
214			ulOffset = (ulOffset + 32L) & 0xffffffE0L;
215
216		tmp = STG_READ_REG(DACOverlayVAddr);
217		CLEAR_BITS_FRM_TO(0, 20);
218		tmp |= (ulOffset >> 4);
219		STG_WRITE_REG(DACOverlayVAddr, tmp);
220
221		*retUVStride = uvStride * 16;
222	}
223
224
225	/* Set Overlay YUV pixel format
226	 * Make sure that LUT not used - ??????
227	 */
228	tmp = STG_READ_REG(DACPixelFormat);
229	/* Only support Planer or UYVY linear formats */
230	CLEAR_BITS_FRM_TO(4, 9);
231	STG_WRITE_REG(DACPixelFormat, tmp);
232
233	ovlWidth = inWidth;
234	ovlHeight = inHeight;
235	ovlStride = ulStride;
236	ovlLinear = bLinear;
237	*retStride = ulStride << 4;	/* In bytes */
238
239	return 0;
240}
241
242int SetOverlayBlendMode(volatile STG4000REG __iomem *pSTGReg,
243			OVRL_BLEND_MODE mode,
244			u32 ulAlpha, u32 ulColorKey)
245{
246	u32 tmp;
247
248	tmp = STG_READ_REG(DACBlendCtrl);
249	CLEAR_BITS_FRM_TO(28, 30);
250	tmp |= (mode << 28);
251
252	switch (mode) {
253	case COLOR_KEY:
254		CLEAR_BITS_FRM_TO(0, 23);
255		tmp |= (ulColorKey & 0x00FFFFFF);
256		break;
257
258	case GLOBAL_ALPHA:
259		CLEAR_BITS_FRM_TO(24, 27);
260		tmp |= ((ulAlpha & 0xF) << 24);
261		break;
262
263	case CK_PIXEL_ALPHA:
264		CLEAR_BITS_FRM_TO(0, 23);
265		tmp |= (ulColorKey & 0x00FFFFFF);
266		break;
267
268	case CK_GLOBAL_ALPHA:
269		CLEAR_BITS_FRM_TO(0, 23);
270		tmp |= (ulColorKey & 0x00FFFFFF);
271		CLEAR_BITS_FRM_TO(24, 27);
272		tmp |= ((ulAlpha & 0xF) << 24);
273		break;
274
275	case GRAPHICS_MODE:
276	case PER_PIXEL_ALPHA:
277		break;
278
279	default:
280		return -EINVAL;
281	}
282
283	STG_WRITE_REG(DACBlendCtrl, tmp);
284
285	return 0;
286}
287
288void EnableOverlayPlane(volatile STG4000REG __iomem *pSTGReg)
289{
290	u32 tmp;
291	/* Enable Overlay */
292	tmp = STG_READ_REG(DACPixelFormat);
293	tmp |= SET_BIT(7);
294	STG_WRITE_REG(DACPixelFormat, tmp);
295
296	/* Set video stream control */
297	tmp = STG_READ_REG(DACStreamCtrl);
298	tmp |= SET_BIT(1);	/* video stream */
299	STG_WRITE_REG(DACStreamCtrl, tmp);
300}
301
302static u32 Overlap(u32 ulBits, u32 ulPattern)
303{
304	u32 ulCount = 0;
305
306	while (ulBits) {
307		if (!(ulPattern & 1))
308			ulCount++;
309		ulBits--;
310		ulPattern = ulPattern >> 1;
311	}
312
313	return ulCount;
314
315}
316
317int SetOverlayViewPort(volatile STG4000REG __iomem *pSTGReg,
318		       u32 left, u32 top,
319		       u32 right, u32 bottom)
320{
321	OVRL_SRC_DEST srcDest;
322
323	u32 ulSrcTop, ulSrcBottom;
324	u32 ulSrc, ulDest;
325	u32 ulFxScale, ulFxOffset;
326	u32 ulHeight, ulWidth;
327	u32 ulPattern;
328	u32 ulDecimate, ulDecimated;
329	u32 ulApplied;
330	u32 ulDacXScale, ulDacYScale;
331	u32 ulScale;
332	u32 ulLeft, ulRight;
333	u32 ulSrcLeft, ulSrcRight;
334	u32 ulScaleLeft, ulScaleRight;
335	u32 ulhDecim;
336	u32 ulsVal;
337	u32 ulVertDecFactor;
338	int bResult;
339	u32 ulClipOff = 0;
340	u32 ulBits = 0;
341	u32 ulsAdd = 0;
342	u32 tmp, ulStride;
343	u32 ulExcessPixels, ulClip, ulExtraLines;
344
345
346	srcDest.ulSrcX1 = 0;
347	srcDest.ulSrcY1 = 0;
348	srcDest.ulSrcX2 = ovlWidth - 1;
349	srcDest.ulSrcY2 = ovlHeight - 1;
350
351	srcDest.ulDstX1 = left;
352	srcDest.ulDstY1 = top;
353	srcDest.ulDstX2 = right;
354	srcDest.ulDstY2 = bottom;
355
356	srcDest.lDstX1 = srcDest.ulDstX1;
357	srcDest.lDstY1 = srcDest.ulDstY1;
358	srcDest.lDstX2 = srcDest.ulDstX2;
359	srcDest.lDstY2 = srcDest.ulDstY2;
360
361    /************* Vertical decimation/scaling ******************/
362
363	/* Get Src Top and Bottom */
364	ulSrcTop = srcDest.ulSrcY1;
365	ulSrcBottom = srcDest.ulSrcY2;
366
367	ulSrc = ulSrcBottom - ulSrcTop;
368	ulDest = srcDest.lDstY2 - srcDest.lDstY1;	/* on-screen overlay */
369
370	if (ulSrc <= 1)
371		return -EINVAL;
372
373	/* First work out the position we are to display as offset from the
374	 * source of the buffer
375	 */
376	ulFxScale = (ulDest << 11) / ulSrc;	/* fixed point scale factor */
377	ulFxOffset = (srcDest.lDstY2 - srcDest.ulDstY2) << 11;
378
379	ulSrcBottom = ulSrcBottom - (ulFxOffset / ulFxScale);
380	ulSrc = ulSrcBottom - ulSrcTop;
381	ulHeight = ulSrc;
382
383	ulDest = srcDest.ulDstY2 - (srcDest.ulDstY1 - 1);
384	ulPattern = adwDecim8[ulBits];
385
386	/* At this point ulSrc represents the input decimator */
387	if (ulSrc > ulDest) {
388		ulDecimate = ulSrc - ulDest;
389		ulBits = 0;
390		ulApplied = ulSrc / 32;
391
392		while (((ulBits * ulApplied) +
393			Overlap((ulSrc % 32),
394				adwDecim8[ulBits])) < ulDecimate)
395			ulBits++;
396
397		ulPattern = adwDecim8[ulBits];
398		ulDecimated =
399		    (ulBits * ulApplied) + Overlap((ulSrc % 32),
400						   ulPattern);
401		ulSrc = ulSrc - ulDecimated;	/* the number number of lines that will go into the scaler */
402	}
403
404	if (ulBits && (ulBits != 32)) {
405		ulVertDecFactor = (63 - ulBits) / (32 - ulBits);	/* vertical decimation factor scaled up to nearest integer */
406	} else {
407		ulVertDecFactor = 1;
408	}
409
410	ulDacYScale = ((ulSrc - 1) * 2048) / (ulDest + 1);
411
412	tmp = STG_READ_REG(DACOverlayVtDec);	/* Decimation */
413	CLEAR_BITS_FRM_TO(0, 31);
414	tmp = ulPattern;
415	STG_WRITE_REG(DACOverlayVtDec, tmp);
416
417	/***************** Horizontal decimation/scaling ***************************/
418
419	/*
420	 * Now we handle the horizontal case, this is a simplified version of
421	 * the vertical case in that we decimate by factors of 2.  as we are
422	 * working in words we should always be able to decimate by these
423	 * factors.  as we always have to have a buffer which is aligned to a
424	 * whole number of 128 bit words, we must align the left side to the
425	 * lowest to the next lowest 128 bit boundary, and the right hand edge
426	 * to the next largets boundary, (in a similar way to how we didi it in
427	 * PMX1) as the left and right hand edges are aligned to these
428	 * boundaries normally this only becomes an issue when we are chopping
429	 * of one of the sides We shall work out vertical stuff first
430	 */
431	ulSrc = srcDest.ulSrcX2 - srcDest.ulSrcX1;
432	ulDest = srcDest.lDstX2 - srcDest.lDstX1;
433#ifdef _OLDCODE
434	ulLeft = srcDest.ulDstX1;
435	ulRight = srcDest.ulDstX2;
436#else
437	if (srcDest.ulDstX1 > 2) {
438		ulLeft = srcDest.ulDstX1 + 2;
439		ulRight = srcDest.ulDstX2 + 1;
440	} else {
441		ulLeft = srcDest.ulDstX1;
442		ulRight = srcDest.ulDstX2 + 1;
443	}
444#endif
445	/* first work out the position we are to display as offset from the source of the buffer */
446	bResult = 1;
447
448	do {
449		if (ulDest == 0)
450			return -EINVAL;
451
452		/* source pixels per dest pixel <<11 */
453		ulFxScale = ((ulSrc - 1) << 11) / (ulDest);
454
455		/* then number of destination pixels out we are */
456		ulFxOffset = ulFxScale * ((srcDest.ulDstX1 - srcDest.lDstX1) + ulClipOff);
457		ulFxOffset >>= 11;
458
459		/* this replaces the code which was making a decision as to use either ulFxOffset or ulSrcX1 */
460		ulSrcLeft = srcDest.ulSrcX1 + ulFxOffset;
461
462		/* then number of destination pixels out we are */
463		ulFxOffset = ulFxScale * (srcDest.lDstX2 - srcDest.ulDstX2);
464		ulFxOffset >>= 11;
465
466		ulSrcRight = srcDest.ulSrcX2 - ulFxOffset;
467
468		/*
469		 * we must align these to our 128 bit boundaries. we shall
470		 * round down the pixel pos to the nearest 8 pixels.
471		 */
472		ulScaleLeft = ulSrcLeft;
473		ulScaleRight = ulSrcRight;
474
475		/* shift fxscale until it is in the range of the scaler */
476		ulhDecim = 0;
477		ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);
478
479		while (ulScale > 0x800) {
480			ulhDecim++;
481			ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRight - ulLeft + 2);
482		}
483
484		/*
485		 * to try and get the best values We first try and use
486		 * src/dwdest for the scale factor, then we move onto src-1
487		 *
488		 * we want to check to see if we will need to clip data, if so
489		 * then we should clip our source so that we don't need to
490		 */
491		if (!ovlLinear) {
492			ulSrcLeft &= ~0x1f;
493
494			/*
495			 * we must align the right hand edge to the next 32
496			 * pixel` boundary, must be on a 256 boundary so u, and
497			 * v are 128 bit aligned
498			 */
499			ulSrcRight = (ulSrcRight + 0x1f) & ~0x1f;
500		} else {
501			ulSrcLeft &= ~0x7;
502
503			/*
504			 * we must align the right hand edge to the next
505			 * 8pixel` boundary
506			 */
507			ulSrcRight = (ulSrcRight + 0x7) & ~0x7;
508		}
509
510		/* this is the input size line store needs to cope with */
511		ulWidth = ulSrcRight - ulSrcLeft;
512
513		/*
514		 * use unclipped value to work out scale factror this is the
515		 * scale factor we want we shall now work out the horizonal
516		 * decimation and scaling
517		 */
518		ulsVal = ((ulWidth / 8) >> ulhDecim);
519
520		if ((ulWidth != (ulsVal << ulhDecim) * 8))
521			ulsAdd = 1;
522
523		/* input pixels to scaler; */
524		ulSrc = ulWidth >> ulhDecim;
525
526		if (ulSrc <= 2)
527			return -EINVAL;
528
529		ulExcessPixels = ((((ulScaleLeft - ulSrcLeft)) << (11 - ulhDecim)) / ulScale);
530
531		ulClip = (ulSrc << 11) / ulScale;
532		ulClip -= (ulRight - ulLeft);
533		ulClip += ulExcessPixels;
534
535		if (ulClip)
536			ulClip--;
537
538		/* We may need to do more here if we really have a HW rev < 5 */
539	} while (!bResult);
540
541	ulExtraLines = (1 << ulhDecim) * ulVertDecFactor;
542	ulExtraLines += 64;
543	ulHeight += ulExtraLines;
544
545	ulDacXScale = ulScale;
546
547
548	tmp = STG_READ_REG(DACVerticalScal);
549	CLEAR_BITS_FRM_TO(0, 11);
550	CLEAR_BITS_FRM_TO(16, 22);	/* Vertical Scaling */
551
552	/* Calculate new output line stride, this is always the number of 422
553	   words in the line buffer, so it doesn't matter if the
554	   mode is 420. Then set the vertical scale register.
555	 */
556	ulStride = (ulWidth >> (ulhDecim + 3)) + ulsAdd;
557	tmp |= ((ulStride << 16) | (ulDacYScale));	/* DAC_LS_CTRL = stride */
558	STG_WRITE_REG(DACVerticalScal, tmp);
559
560	/* Now set up the overlay size using the modified width and height
561	   from decimate and scaling calculations
562	 */
563	tmp = STG_READ_REG(DACOverlaySize);
564	CLEAR_BITS_FRM_TO(0, 10);
565	CLEAR_BITS_FRM_TO(12, 31);
566
567	if (ovlLinear) {
568		tmp |=
569		    (ovlStride | ((ulHeight + 1) << 12) |
570		     (((ulWidth / 8) - 1) << 23));
571	} else {
572		tmp |=
573		    (ovlStride | ((ulHeight + 1) << 12) |
574		     (((ulWidth / 32) - 1) << 23));
575	}
576
577	STG_WRITE_REG(DACOverlaySize, tmp);
578
579	/* Set Video Window Start */
580	tmp = ((ulLeft << 16)) | (srcDest.ulDstY1);
581	STG_WRITE_REG(DACVidWinStart, tmp);
582
583	/* Set Video Window End */
584	tmp = ((ulRight) << 16) | (srcDest.ulDstY2);
585	STG_WRITE_REG(DACVidWinEnd, tmp);
586
587	/* Finally set up the rest of the overlay regs in the order
588	   done in the IMG driver
589	 */
590	tmp = STG_READ_REG(DACPixelFormat);
591	tmp = ((ulExcessPixels << 16) | tmp) & 0x7fffffff;
592	STG_WRITE_REG(DACPixelFormat, tmp);
593
594	tmp = STG_READ_REG(DACHorizontalScal);
595	CLEAR_BITS_FRM_TO(0, 11);
596	CLEAR_BITS_FRM_TO(16, 17);
597	tmp |= ((ulhDecim << 16) | (ulDacXScale));
598	STG_WRITE_REG(DACHorizontalScal, tmp);
599
600	return 0;
601}
602