1/* 2 * Freescale GPMI NAND Flash Driver 3 * 4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. 5 * Copyright (C) 2008 Embedded Alley Solutions, Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21#include <linux/delay.h> 22#include <linux/clk.h> 23#include <linux/slab.h> 24 25#include "gpmi-nand.h" 26#include "gpmi-regs.h" 27#include "bch-regs.h" 28 29static struct timing_threshod timing_default_threshold = { 30 .max_data_setup_cycles = (BM_GPMI_TIMING0_DATA_SETUP >> 31 BP_GPMI_TIMING0_DATA_SETUP), 32 .internal_data_setup_in_ns = 0, 33 .max_sample_delay_factor = (BM_GPMI_CTRL1_RDN_DELAY >> 34 BP_GPMI_CTRL1_RDN_DELAY), 35 .max_dll_clock_period_in_ns = 32, 36 .max_dll_delay_in_ns = 16, 37}; 38 39#define MXS_SET_ADDR 0x4 40#define MXS_CLR_ADDR 0x8 41/* 42 * Clear the bit and poll it cleared. This is usually called with 43 * a reset address and mask being either SFTRST(bit 31) or CLKGATE 44 * (bit 30). 45 */ 46static int clear_poll_bit(void __iomem *addr, u32 mask) 47{ 48 int timeout = 0x400; 49 50 /* clear the bit */ 51 writel(mask, addr + MXS_CLR_ADDR); 52 53 /* 54 * SFTRST needs 3 GPMI clocks to settle, the reference manual 55 * recommends to wait 1us. 56 */ 57 udelay(1); 58 59 /* poll the bit becoming clear */ 60 while ((readl(addr) & mask) && --timeout) 61 /* nothing */; 62 63 return !timeout; 64} 65 66#define MODULE_CLKGATE (1 << 30) 67#define MODULE_SFTRST (1 << 31) 68/* 69 * The current mxs_reset_block() will do two things: 70 * [1] enable the module. 71 * [2] reset the module. 72 * 73 * In most of the cases, it's ok. 74 * But in MX23, there is a hardware bug in the BCH block (see erratum #2847). 75 * If you try to soft reset the BCH block, it becomes unusable until 76 * the next hard reset. This case occurs in the NAND boot mode. When the board 77 * boots by NAND, the ROM of the chip will initialize the BCH blocks itself. 78 * So If the driver tries to reset the BCH again, the BCH will not work anymore. 79 * You will see a DMA timeout in this case. The bug has been fixed 80 * in the following chips, such as MX28. 81 * 82 * To avoid this bug, just add a new parameter `just_enable` for 83 * the mxs_reset_block(), and rewrite it here. 84 */ 85static int gpmi_reset_block(void __iomem *reset_addr, bool just_enable) 86{ 87 int ret; 88 int timeout = 0x400; 89 90 /* clear and poll SFTRST */ 91 ret = clear_poll_bit(reset_addr, MODULE_SFTRST); 92 if (unlikely(ret)) 93 goto error; 94 95 /* clear CLKGATE */ 96 writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR); 97 98 if (!just_enable) { 99 /* set SFTRST to reset the block */ 100 writel(MODULE_SFTRST, reset_addr + MXS_SET_ADDR); 101 udelay(1); 102 103 /* poll CLKGATE becoming set */ 104 while ((!(readl(reset_addr) & MODULE_CLKGATE)) && --timeout) 105 /* nothing */; 106 if (unlikely(!timeout)) 107 goto error; 108 } 109 110 /* clear and poll SFTRST */ 111 ret = clear_poll_bit(reset_addr, MODULE_SFTRST); 112 if (unlikely(ret)) 113 goto error; 114 115 /* clear and poll CLKGATE */ 116 ret = clear_poll_bit(reset_addr, MODULE_CLKGATE); 117 if (unlikely(ret)) 118 goto error; 119 120 return 0; 121 122error: 123 pr_err("%s(%p): module reset timeout\n", __func__, reset_addr); 124 return -ETIMEDOUT; 125} 126 127static int __gpmi_enable_clk(struct gpmi_nand_data *this, bool v) 128{ 129 struct clk *clk; 130 int ret; 131 int i; 132 133 for (i = 0; i < GPMI_CLK_MAX; i++) { 134 clk = this->resources.clock[i]; 135 if (!clk) 136 break; 137 138 if (v) { 139 ret = clk_prepare_enable(clk); 140 if (ret) 141 goto err_clk; 142 } else { 143 clk_disable_unprepare(clk); 144 } 145 } 146 return 0; 147 148err_clk: 149 for (; i > 0; i--) 150 clk_disable_unprepare(this->resources.clock[i - 1]); 151 return ret; 152} 153 154#define gpmi_enable_clk(x) __gpmi_enable_clk(x, true) 155#define gpmi_disable_clk(x) __gpmi_enable_clk(x, false) 156 157int gpmi_init(struct gpmi_nand_data *this) 158{ 159 struct resources *r = &this->resources; 160 int ret; 161 162 ret = gpmi_enable_clk(this); 163 if (ret) 164 goto err_out; 165 ret = gpmi_reset_block(r->gpmi_regs, false); 166 if (ret) 167 goto err_out; 168 169 /* 170 * Reset BCH here, too. We got failures otherwise :( 171 * See later BCH reset for explanation of MX23 handling 172 */ 173 ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); 174 if (ret) 175 goto err_out; 176 177 178 /* Choose NAND mode. */ 179 writel(BM_GPMI_CTRL1_GPMI_MODE, r->gpmi_regs + HW_GPMI_CTRL1_CLR); 180 181 /* Set the IRQ polarity. */ 182 writel(BM_GPMI_CTRL1_ATA_IRQRDY_POLARITY, 183 r->gpmi_regs + HW_GPMI_CTRL1_SET); 184 185 /* Disable Write-Protection. */ 186 writel(BM_GPMI_CTRL1_DEV_RESET, r->gpmi_regs + HW_GPMI_CTRL1_SET); 187 188 /* Select BCH ECC. */ 189 writel(BM_GPMI_CTRL1_BCH_MODE, r->gpmi_regs + HW_GPMI_CTRL1_SET); 190 191 /* 192 * Decouple the chip select from dma channel. We use dma0 for all 193 * the chips. 194 */ 195 writel(BM_GPMI_CTRL1_DECOUPLE_CS, r->gpmi_regs + HW_GPMI_CTRL1_SET); 196 197 gpmi_disable_clk(this); 198 return 0; 199err_out: 200 return ret; 201} 202 203/* This function is very useful. It is called only when the bug occur. */ 204void gpmi_dump_info(struct gpmi_nand_data *this) 205{ 206 struct resources *r = &this->resources; 207 struct bch_geometry *geo = &this->bch_geometry; 208 u32 reg; 209 int i; 210 211 dev_err(this->dev, "Show GPMI registers :\n"); 212 for (i = 0; i <= HW_GPMI_DEBUG / 0x10 + 1; i++) { 213 reg = readl(r->gpmi_regs + i * 0x10); 214 dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); 215 } 216 217 /* start to print out the BCH info */ 218 dev_err(this->dev, "Show BCH registers :\n"); 219 for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) { 220 reg = readl(r->bch_regs + i * 0x10); 221 dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); 222 } 223 dev_err(this->dev, "BCH Geometry :\n" 224 "GF length : %u\n" 225 "ECC Strength : %u\n" 226 "Page Size in Bytes : %u\n" 227 "Metadata Size in Bytes : %u\n" 228 "ECC Chunk Size in Bytes: %u\n" 229 "ECC Chunk Count : %u\n" 230 "Payload Size in Bytes : %u\n" 231 "Auxiliary Size in Bytes: %u\n" 232 "Auxiliary Status Offset: %u\n" 233 "Block Mark Byte Offset : %u\n" 234 "Block Mark Bit Offset : %u\n", 235 geo->gf_len, 236 geo->ecc_strength, 237 geo->page_size, 238 geo->metadata_size, 239 geo->ecc_chunk_size, 240 geo->ecc_chunk_count, 241 geo->payload_size, 242 geo->auxiliary_size, 243 geo->auxiliary_status_offset, 244 geo->block_mark_byte_offset, 245 geo->block_mark_bit_offset); 246} 247 248/* Configures the geometry for BCH. */ 249int bch_set_geometry(struct gpmi_nand_data *this) 250{ 251 struct resources *r = &this->resources; 252 struct bch_geometry *bch_geo = &this->bch_geometry; 253 unsigned int block_count; 254 unsigned int block_size; 255 unsigned int metadata_size; 256 unsigned int ecc_strength; 257 unsigned int page_size; 258 unsigned int gf_len; 259 int ret; 260 261 if (common_nfc_set_geometry(this)) 262 return !0; 263 264 block_count = bch_geo->ecc_chunk_count - 1; 265 block_size = bch_geo->ecc_chunk_size; 266 metadata_size = bch_geo->metadata_size; 267 ecc_strength = bch_geo->ecc_strength >> 1; 268 page_size = bch_geo->page_size; 269 gf_len = bch_geo->gf_len; 270 271 ret = gpmi_enable_clk(this); 272 if (ret) 273 goto err_out; 274 275 /* 276 * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this 277 * chip, otherwise it will lock up. So we skip resetting BCH on the MX23. 278 * On the other hand, the MX28 needs the reset, because one case has been 279 * seen where the BCH produced ECC errors constantly after 10000 280 * consecutive reboots. The latter case has not been seen on the MX23 281 * yet, still we don't know if it could happen there as well. 282 */ 283 ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this)); 284 if (ret) 285 goto err_out; 286 287 /* Configure layout 0. */ 288 writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count) 289 | BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size) 290 | BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) 291 | BF_BCH_FLASH0LAYOUT0_GF(gf_len, this) 292 | BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this), 293 r->bch_regs + HW_BCH_FLASH0LAYOUT0); 294 295 writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size) 296 | BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) 297 | BF_BCH_FLASH0LAYOUT1_GF(gf_len, this) 298 | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this), 299 r->bch_regs + HW_BCH_FLASH0LAYOUT1); 300 301 /* Set *all* chip selects to use layout 0. */ 302 writel(0, r->bch_regs + HW_BCH_LAYOUTSELECT); 303 304 /* Enable interrupts. */ 305 writel(BM_BCH_CTRL_COMPLETE_IRQ_EN, 306 r->bch_regs + HW_BCH_CTRL_SET); 307 308 gpmi_disable_clk(this); 309 return 0; 310err_out: 311 return ret; 312} 313 314/* Converts time in nanoseconds to cycles. */ 315static unsigned int ns_to_cycles(unsigned int time, 316 unsigned int period, unsigned int min) 317{ 318 unsigned int k; 319 320 k = (time + period - 1) / period; 321 return max(k, min); 322} 323 324#define DEF_MIN_PROP_DELAY 5 325#define DEF_MAX_PROP_DELAY 9 326/* Apply timing to current hardware conditions. */ 327static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this, 328 struct gpmi_nfc_hardware_timing *hw) 329{ 330 struct timing_threshod *nfc = &timing_default_threshold; 331 struct resources *r = &this->resources; 332 struct nand_chip *nand = &this->nand; 333 struct nand_timing target = this->timing; 334 bool improved_timing_is_available; 335 unsigned long clock_frequency_in_hz; 336 unsigned int clock_period_in_ns; 337 bool dll_use_half_periods; 338 unsigned int dll_delay_shift; 339 unsigned int max_sample_delay_in_ns; 340 unsigned int address_setup_in_cycles; 341 unsigned int data_setup_in_ns; 342 unsigned int data_setup_in_cycles; 343 unsigned int data_hold_in_cycles; 344 int ideal_sample_delay_in_ns; 345 unsigned int sample_delay_factor; 346 int tEYE; 347 unsigned int min_prop_delay_in_ns = DEF_MIN_PROP_DELAY; 348 unsigned int max_prop_delay_in_ns = DEF_MAX_PROP_DELAY; 349 350 /* 351 * If there are multiple chips, we need to relax the timings to allow 352 * for signal distortion due to higher capacitance. 353 */ 354 if (nand->numchips > 2) { 355 target.data_setup_in_ns += 10; 356 target.data_hold_in_ns += 10; 357 target.address_setup_in_ns += 10; 358 } else if (nand->numchips > 1) { 359 target.data_setup_in_ns += 5; 360 target.data_hold_in_ns += 5; 361 target.address_setup_in_ns += 5; 362 } 363 364 /* Check if improved timing information is available. */ 365 improved_timing_is_available = 366 (target.tREA_in_ns >= 0) && 367 (target.tRLOH_in_ns >= 0) && 368 (target.tRHOH_in_ns >= 0); 369 370 /* Inspect the clock. */ 371 nfc->clock_frequency_in_hz = clk_get_rate(r->clock[0]); 372 clock_frequency_in_hz = nfc->clock_frequency_in_hz; 373 clock_period_in_ns = NSEC_PER_SEC / clock_frequency_in_hz; 374 375 /* 376 * The NFC quantizes setup and hold parameters in terms of clock cycles. 377 * Here, we quantize the setup and hold timing parameters to the 378 * next-highest clock period to make sure we apply at least the 379 * specified times. 380 * 381 * For data setup and data hold, the hardware interprets a value of zero 382 * as the largest possible delay. This is not what's intended by a zero 383 * in the input parameter, so we impose a minimum of one cycle. 384 */ 385 data_setup_in_cycles = ns_to_cycles(target.data_setup_in_ns, 386 clock_period_in_ns, 1); 387 data_hold_in_cycles = ns_to_cycles(target.data_hold_in_ns, 388 clock_period_in_ns, 1); 389 address_setup_in_cycles = ns_to_cycles(target.address_setup_in_ns, 390 clock_period_in_ns, 0); 391 392 /* 393 * The clock's period affects the sample delay in a number of ways: 394 * 395 * (1) The NFC HAL tells us the maximum clock period the sample delay 396 * DLL can tolerate. If the clock period is greater than half that 397 * maximum, we must configure the DLL to be driven by half periods. 398 * 399 * (2) We need to convert from an ideal sample delay, in ns, to a 400 * "sample delay factor," which the NFC uses. This factor depends on 401 * whether we're driving the DLL with full or half periods. 402 * Paraphrasing the reference manual: 403 * 404 * AD = SDF x 0.125 x RP 405 * 406 * where: 407 * 408 * AD is the applied delay, in ns. 409 * SDF is the sample delay factor, which is dimensionless. 410 * RP is the reference period, in ns, which is a full clock period 411 * if the DLL is being driven by full periods, or half that if 412 * the DLL is being driven by half periods. 413 * 414 * Let's re-arrange this in a way that's more useful to us: 415 * 416 * 8 417 * SDF = AD x ---- 418 * RP 419 * 420 * The reference period is either the clock period or half that, so this 421 * is: 422 * 423 * 8 AD x DDF 424 * SDF = AD x ----- = -------- 425 * f x P P 426 * 427 * where: 428 * 429 * f is 1 or 1/2, depending on how we're driving the DLL. 430 * P is the clock period. 431 * DDF is the DLL Delay Factor, a dimensionless value that 432 * incorporates all the constants in the conversion. 433 * 434 * DDF will be either 8 or 16, both of which are powers of two. We can 435 * reduce the cost of this conversion by using bit shifts instead of 436 * multiplication or division. Thus: 437 * 438 * AD << DDS 439 * SDF = --------- 440 * P 441 * 442 * or 443 * 444 * AD = (SDF >> DDS) x P 445 * 446 * where: 447 * 448 * DDS is the DLL Delay Shift, the logarithm to base 2 of the DDF. 449 */ 450 if (clock_period_in_ns > (nfc->max_dll_clock_period_in_ns >> 1)) { 451 dll_use_half_periods = true; 452 dll_delay_shift = 3 + 1; 453 } else { 454 dll_use_half_periods = false; 455 dll_delay_shift = 3; 456 } 457 458 /* 459 * Compute the maximum sample delay the NFC allows, under current 460 * conditions. If the clock is running too slowly, no sample delay is 461 * possible. 462 */ 463 if (clock_period_in_ns > nfc->max_dll_clock_period_in_ns) 464 max_sample_delay_in_ns = 0; 465 else { 466 /* 467 * Compute the delay implied by the largest sample delay factor 468 * the NFC allows. 469 */ 470 max_sample_delay_in_ns = 471 (nfc->max_sample_delay_factor * clock_period_in_ns) >> 472 dll_delay_shift; 473 474 /* 475 * Check if the implied sample delay larger than the NFC 476 * actually allows. 477 */ 478 if (max_sample_delay_in_ns > nfc->max_dll_delay_in_ns) 479 max_sample_delay_in_ns = nfc->max_dll_delay_in_ns; 480 } 481 482 /* 483 * Check if improved timing information is available. If not, we have to 484 * use a less-sophisticated algorithm. 485 */ 486 if (!improved_timing_is_available) { 487 /* 488 * Fold the read setup time required by the NFC into the ideal 489 * sample delay. 490 */ 491 ideal_sample_delay_in_ns = target.gpmi_sample_delay_in_ns + 492 nfc->internal_data_setup_in_ns; 493 494 /* 495 * The ideal sample delay may be greater than the maximum 496 * allowed by the NFC. If so, we can trade off sample delay time 497 * for more data setup time. 498 * 499 * In each iteration of the following loop, we add a cycle to 500 * the data setup time and subtract a corresponding amount from 501 * the sample delay until we've satisified the constraints or 502 * can't do any better. 503 */ 504 while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) && 505 (data_setup_in_cycles < nfc->max_data_setup_cycles)) { 506 507 data_setup_in_cycles++; 508 ideal_sample_delay_in_ns -= clock_period_in_ns; 509 510 if (ideal_sample_delay_in_ns < 0) 511 ideal_sample_delay_in_ns = 0; 512 513 } 514 515 /* 516 * Compute the sample delay factor that corresponds most closely 517 * to the ideal sample delay. If the result is too large for the 518 * NFC, use the maximum value. 519 * 520 * Notice that we use the ns_to_cycles function to compute the 521 * sample delay factor. We do this because the form of the 522 * computation is the same as that for calculating cycles. 523 */ 524 sample_delay_factor = 525 ns_to_cycles( 526 ideal_sample_delay_in_ns << dll_delay_shift, 527 clock_period_in_ns, 0); 528 529 if (sample_delay_factor > nfc->max_sample_delay_factor) 530 sample_delay_factor = nfc->max_sample_delay_factor; 531 532 /* Skip to the part where we return our results. */ 533 goto return_results; 534 } 535 536 /* 537 * If control arrives here, we have more detailed timing information, 538 * so we can use a better algorithm. 539 */ 540 541 /* 542 * Fold the read setup time required by the NFC into the maximum 543 * propagation delay. 544 */ 545 max_prop_delay_in_ns += nfc->internal_data_setup_in_ns; 546 547 /* 548 * Earlier, we computed the number of clock cycles required to satisfy 549 * the data setup time. Now, we need to know the actual nanoseconds. 550 */ 551 data_setup_in_ns = clock_period_in_ns * data_setup_in_cycles; 552 553 /* 554 * Compute tEYE, the width of the data eye when reading from the NAND 555 * Flash. The eye width is fundamentally determined by the data setup 556 * time, perturbed by propagation delays and some characteristics of the 557 * NAND Flash device. 558 * 559 * start of the eye = max_prop_delay + tREA 560 * end of the eye = min_prop_delay + tRHOH + data_setup 561 */ 562 tEYE = (int)min_prop_delay_in_ns + (int)target.tRHOH_in_ns + 563 (int)data_setup_in_ns; 564 565 tEYE -= (int)max_prop_delay_in_ns + (int)target.tREA_in_ns; 566 567 /* 568 * The eye must be open. If it's not, we can try to open it by 569 * increasing its main forcer, the data setup time. 570 * 571 * In each iteration of the following loop, we increase the data setup 572 * time by a single clock cycle. We do this until either the eye is 573 * open or we run into NFC limits. 574 */ 575 while ((tEYE <= 0) && 576 (data_setup_in_cycles < nfc->max_data_setup_cycles)) { 577 /* Give a cycle to data setup. */ 578 data_setup_in_cycles++; 579 /* Synchronize the data setup time with the cycles. */ 580 data_setup_in_ns += clock_period_in_ns; 581 /* Adjust tEYE accordingly. */ 582 tEYE += clock_period_in_ns; 583 } 584 585 /* 586 * When control arrives here, the eye is open. The ideal time to sample 587 * the data is in the center of the eye: 588 * 589 * end of the eye + start of the eye 590 * --------------------------------- - data_setup 591 * 2 592 * 593 * After some algebra, this simplifies to the code immediately below. 594 */ 595 ideal_sample_delay_in_ns = 596 ((int)max_prop_delay_in_ns + 597 (int)target.tREA_in_ns + 598 (int)min_prop_delay_in_ns + 599 (int)target.tRHOH_in_ns - 600 (int)data_setup_in_ns) >> 1; 601 602 /* 603 * The following figure illustrates some aspects of a NAND Flash read: 604 * 605 * 606 * __ _____________________________________ 607 * RDN \_________________/ 608 * 609 * <---- tEYE -----> 610 * /-----------------\ 611 * Read Data ----------------------------< >--------- 612 * \-----------------/ 613 * ^ ^ ^ ^ 614 * | | | | 615 * |<--Data Setup -->|<--Delay Time -->| | 616 * | | | | 617 * | | | 618 * | |<-- Quantized Delay Time -->| 619 * | | | 620 * 621 * 622 * We have some issues we must now address: 623 * 624 * (1) The *ideal* sample delay time must not be negative. If it is, we 625 * jam it to zero. 626 * 627 * (2) The *ideal* sample delay time must not be greater than that 628 * allowed by the NFC. If it is, we can increase the data setup 629 * time, which will reduce the delay between the end of the data 630 * setup and the center of the eye. It will also make the eye 631 * larger, which might help with the next issue... 632 * 633 * (3) The *quantized* sample delay time must not fall either before the 634 * eye opens or after it closes (the latter is the problem 635 * illustrated in the above figure). 636 */ 637 638 /* Jam a negative ideal sample delay to zero. */ 639 if (ideal_sample_delay_in_ns < 0) 640 ideal_sample_delay_in_ns = 0; 641 642 /* 643 * Extend the data setup as needed to reduce the ideal sample delay 644 * below the maximum permitted by the NFC. 645 */ 646 while ((ideal_sample_delay_in_ns > max_sample_delay_in_ns) && 647 (data_setup_in_cycles < nfc->max_data_setup_cycles)) { 648 649 /* Give a cycle to data setup. */ 650 data_setup_in_cycles++; 651 /* Synchronize the data setup time with the cycles. */ 652 data_setup_in_ns += clock_period_in_ns; 653 /* Adjust tEYE accordingly. */ 654 tEYE += clock_period_in_ns; 655 656 /* 657 * Decrease the ideal sample delay by one half cycle, to keep it 658 * in the middle of the eye. 659 */ 660 ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1); 661 662 /* Jam a negative ideal sample delay to zero. */ 663 if (ideal_sample_delay_in_ns < 0) 664 ideal_sample_delay_in_ns = 0; 665 } 666 667 /* 668 * Compute the sample delay factor that corresponds to the ideal sample 669 * delay. If the result is too large, then use the maximum allowed 670 * value. 671 * 672 * Notice that we use the ns_to_cycles function to compute the sample 673 * delay factor. We do this because the form of the computation is the 674 * same as that for calculating cycles. 675 */ 676 sample_delay_factor = 677 ns_to_cycles(ideal_sample_delay_in_ns << dll_delay_shift, 678 clock_period_in_ns, 0); 679 680 if (sample_delay_factor > nfc->max_sample_delay_factor) 681 sample_delay_factor = nfc->max_sample_delay_factor; 682 683 /* 684 * These macros conveniently encapsulate a computation we'll use to 685 * continuously evaluate whether or not the data sample delay is inside 686 * the eye. 687 */ 688 #define IDEAL_DELAY ((int) ideal_sample_delay_in_ns) 689 690 #define QUANTIZED_DELAY \ 691 ((int) ((sample_delay_factor * clock_period_in_ns) >> \ 692 dll_delay_shift)) 693 694 #define DELAY_ERROR (abs(QUANTIZED_DELAY - IDEAL_DELAY)) 695 696 #define SAMPLE_IS_NOT_WITHIN_THE_EYE (DELAY_ERROR > (tEYE >> 1)) 697 698 /* 699 * While the quantized sample time falls outside the eye, reduce the 700 * sample delay or extend the data setup to move the sampling point back 701 * toward the eye. Do not allow the number of data setup cycles to 702 * exceed the maximum allowed by the NFC. 703 */ 704 while (SAMPLE_IS_NOT_WITHIN_THE_EYE && 705 (data_setup_in_cycles < nfc->max_data_setup_cycles)) { 706 /* 707 * If control arrives here, the quantized sample delay falls 708 * outside the eye. Check if it's before the eye opens, or after 709 * the eye closes. 710 */ 711 if (QUANTIZED_DELAY > IDEAL_DELAY) { 712 /* 713 * If control arrives here, the quantized sample delay 714 * falls after the eye closes. Decrease the quantized 715 * delay time and then go back to re-evaluate. 716 */ 717 if (sample_delay_factor != 0) 718 sample_delay_factor--; 719 continue; 720 } 721 722 /* 723 * If control arrives here, the quantized sample delay falls 724 * before the eye opens. Shift the sample point by increasing 725 * data setup time. This will also make the eye larger. 726 */ 727 728 /* Give a cycle to data setup. */ 729 data_setup_in_cycles++; 730 /* Synchronize the data setup time with the cycles. */ 731 data_setup_in_ns += clock_period_in_ns; 732 /* Adjust tEYE accordingly. */ 733 tEYE += clock_period_in_ns; 734 735 /* 736 * Decrease the ideal sample delay by one half cycle, to keep it 737 * in the middle of the eye. 738 */ 739 ideal_sample_delay_in_ns -= (clock_period_in_ns >> 1); 740 741 /* ...and one less period for the delay time. */ 742 ideal_sample_delay_in_ns -= clock_period_in_ns; 743 744 /* Jam a negative ideal sample delay to zero. */ 745 if (ideal_sample_delay_in_ns < 0) 746 ideal_sample_delay_in_ns = 0; 747 748 /* 749 * We have a new ideal sample delay, so re-compute the quantized 750 * delay. 751 */ 752 sample_delay_factor = 753 ns_to_cycles( 754 ideal_sample_delay_in_ns << dll_delay_shift, 755 clock_period_in_ns, 0); 756 757 if (sample_delay_factor > nfc->max_sample_delay_factor) 758 sample_delay_factor = nfc->max_sample_delay_factor; 759 } 760 761 /* Control arrives here when we're ready to return our results. */ 762return_results: 763 hw->data_setup_in_cycles = data_setup_in_cycles; 764 hw->data_hold_in_cycles = data_hold_in_cycles; 765 hw->address_setup_in_cycles = address_setup_in_cycles; 766 hw->use_half_periods = dll_use_half_periods; 767 hw->sample_delay_factor = sample_delay_factor; 768 hw->device_busy_timeout = GPMI_DEFAULT_BUSY_TIMEOUT; 769 hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_4_TO_8NS; 770 771 /* Return success. */ 772 return 0; 773} 774 775/* 776 * <1> Firstly, we should know what's the GPMI-clock means. 777 * The GPMI-clock is the internal clock in the gpmi nand controller. 778 * If you set 100MHz to gpmi nand controller, the GPMI-clock's period 779 * is 10ns. Mark the GPMI-clock's period as GPMI-clock-period. 780 * 781 * <2> Secondly, we should know what's the frequency on the nand chip pins. 782 * The frequency on the nand chip pins is derived from the GPMI-clock. 783 * We can get it from the following equation: 784 * 785 * F = G / (DS + DH) 786 * 787 * F : the frequency on the nand chip pins. 788 * G : the GPMI clock, such as 100MHz. 789 * DS : GPMI_HW_GPMI_TIMING0:DATA_SETUP 790 * DH : GPMI_HW_GPMI_TIMING0:DATA_HOLD 791 * 792 * <3> Thirdly, when the frequency on the nand chip pins is above 33MHz, 793 * the nand EDO(extended Data Out) timing could be applied. 794 * The GPMI implements a feedback read strobe to sample the read data. 795 * The feedback read strobe can be delayed to support the nand EDO timing 796 * where the read strobe may deasserts before the read data is valid, and 797 * read data is valid for some time after read strobe. 798 * 799 * The following figure illustrates some aspects of a NAND Flash read: 800 * 801 * |<---tREA---->| 802 * | | 803 * | | | 804 * |<--tRP-->| | 805 * | | | 806 * __ ___|__________________________________ 807 * RDN \________/ | 808 * | 809 * /---------\ 810 * Read Data --------------< >--------- 811 * \---------/ 812 * | | 813 * |<-D->| 814 * FeedbackRDN ________ ____________ 815 * \___________/ 816 * 817 * D stands for delay, set in the HW_GPMI_CTRL1:RDN_DELAY. 818 * 819 * 820 * <4> Now, we begin to describe how to compute the right RDN_DELAY. 821 * 822 * 4.1) From the aspect of the nand chip pins: 823 * Delay = (tREA + C - tRP) {1} 824 * 825 * tREA : the maximum read access time. From the ONFI nand standards, 826 * we know that tREA is 16ns in mode 5, tREA is 20ns is mode 4. 827 * Please check it in : www.onfi.org 828 * C : a constant for adjust the delay. default is 4. 829 * tRP : the read pulse width. 830 * Specified by the HW_GPMI_TIMING0:DATA_SETUP: 831 * tRP = (GPMI-clock-period) * DATA_SETUP 832 * 833 * 4.2) From the aspect of the GPMI nand controller: 834 * Delay = RDN_DELAY * 0.125 * RP {2} 835 * 836 * RP : the DLL reference period. 837 * if (GPMI-clock-period > DLL_THRETHOLD) 838 * RP = GPMI-clock-period / 2; 839 * else 840 * RP = GPMI-clock-period; 841 * 842 * Set the HW_GPMI_CTRL1:HALF_PERIOD if GPMI-clock-period 843 * is greater DLL_THRETHOLD. In other SOCs, the DLL_THRETHOLD 844 * is 16ns, but in mx6q, we use 12ns. 845 * 846 * 4.3) since {1} equals {2}, we get: 847 * 848 * (tREA + 4 - tRP) * 8 849 * RDN_DELAY = --------------------- {3} 850 * RP 851 * 852 * 4.4) We only support the fastest asynchronous mode of ONFI nand. 853 * For some ONFI nand, the mode 4 is the fastest mode; 854 * while for some ONFI nand, the mode 5 is the fastest mode. 855 * So we only support the mode 4 and mode 5. It is no need to 856 * support other modes. 857 */ 858static void gpmi_compute_edo_timing(struct gpmi_nand_data *this, 859 struct gpmi_nfc_hardware_timing *hw) 860{ 861 struct resources *r = &this->resources; 862 unsigned long rate = clk_get_rate(r->clock[0]); 863 int mode = this->timing_mode; 864 int dll_threshold = this->devdata->max_chain_delay; 865 unsigned long delay; 866 unsigned long clk_period; 867 int t_rea; 868 int c = 4; 869 int t_rp; 870 int rp; 871 872 /* 873 * [1] for GPMI_HW_GPMI_TIMING0: 874 * The async mode requires 40MHz for mode 4, 50MHz for mode 5. 875 * The GPMI can support 100MHz at most. So if we want to 876 * get the 40MHz or 50MHz, we have to set DS=1, DH=1. 877 * Set the ADDRESS_SETUP to 0 in mode 4. 878 */ 879 hw->data_setup_in_cycles = 1; 880 hw->data_hold_in_cycles = 1; 881 hw->address_setup_in_cycles = ((mode == 5) ? 1 : 0); 882 883 /* [2] for GPMI_HW_GPMI_TIMING1 */ 884 hw->device_busy_timeout = 0x9000; 885 886 /* [3] for GPMI_HW_GPMI_CTRL1 */ 887 hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; 888 889 /* 890 * Enlarge 10 times for the numerator and denominator in {3}. 891 * This make us to get more accurate result. 892 */ 893 clk_period = NSEC_PER_SEC / (rate / 10); 894 dll_threshold *= 10; 895 t_rea = ((mode == 5) ? 16 : 20) * 10; 896 c *= 10; 897 898 t_rp = clk_period * 1; /* DATA_SETUP is 1 */ 899 900 if (clk_period > dll_threshold) { 901 hw->use_half_periods = 1; 902 rp = clk_period / 2; 903 } else { 904 hw->use_half_periods = 0; 905 rp = clk_period; 906 } 907 908 /* 909 * Multiply the numerator with 10, we could do a round off: 910 * 7.8 round up to 8; 7.4 round down to 7. 911 */ 912 delay = (((t_rea + c - t_rp) * 8) * 10) / rp; 913 delay = (delay + 5) / 10; 914 915 hw->sample_delay_factor = delay; 916} 917 918static int enable_edo_mode(struct gpmi_nand_data *this, int mode) 919{ 920 struct resources *r = &this->resources; 921 struct nand_chip *nand = &this->nand; 922 struct mtd_info *mtd = &this->mtd; 923 uint8_t *feature; 924 unsigned long rate; 925 int ret; 926 927 feature = kzalloc(ONFI_SUBFEATURE_PARAM_LEN, GFP_KERNEL); 928 if (!feature) 929 return -ENOMEM; 930 931 nand->select_chip(mtd, 0); 932 933 /* [1] send SET FEATURE commond to NAND */ 934 feature[0] = mode; 935 ret = nand->onfi_set_features(mtd, nand, 936 ONFI_FEATURE_ADDR_TIMING_MODE, feature); 937 if (ret) 938 goto err_out; 939 940 /* [2] send GET FEATURE command to double-check the timing mode */ 941 memset(feature, 0, ONFI_SUBFEATURE_PARAM_LEN); 942 ret = nand->onfi_get_features(mtd, nand, 943 ONFI_FEATURE_ADDR_TIMING_MODE, feature); 944 if (ret || feature[0] != mode) 945 goto err_out; 946 947 nand->select_chip(mtd, -1); 948 949 /* [3] set the main IO clock, 100MHz for mode 5, 80MHz for mode 4. */ 950 rate = (mode == 5) ? 100000000 : 80000000; 951 clk_set_rate(r->clock[0], rate); 952 953 /* Let the gpmi_begin() re-compute the timing again. */ 954 this->flags &= ~GPMI_TIMING_INIT_OK; 955 956 this->flags |= GPMI_ASYNC_EDO_ENABLED; 957 this->timing_mode = mode; 958 kfree(feature); 959 dev_info(this->dev, "enable the asynchronous EDO mode %d\n", mode); 960 return 0; 961 962err_out: 963 nand->select_chip(mtd, -1); 964 kfree(feature); 965 dev_err(this->dev, "mode:%d ,failed in set feature.\n", mode); 966 return -EINVAL; 967} 968 969int gpmi_extra_init(struct gpmi_nand_data *this) 970{ 971 struct nand_chip *chip = &this->nand; 972 973 /* Enable the asynchronous EDO feature. */ 974 if (GPMI_IS_MX6(this) && chip->onfi_version) { 975 int mode = onfi_get_async_timing_mode(chip); 976 977 /* We only support the timing mode 4 and mode 5. */ 978 if (mode & ONFI_TIMING_MODE_5) 979 mode = 5; 980 else if (mode & ONFI_TIMING_MODE_4) 981 mode = 4; 982 else 983 return 0; 984 985 return enable_edo_mode(this, mode); 986 } 987 return 0; 988} 989 990/* Begin the I/O */ 991void gpmi_begin(struct gpmi_nand_data *this) 992{ 993 struct resources *r = &this->resources; 994 void __iomem *gpmi_regs = r->gpmi_regs; 995 unsigned int clock_period_in_ns; 996 uint32_t reg; 997 unsigned int dll_wait_time_in_us; 998 struct gpmi_nfc_hardware_timing hw; 999 int ret; 1000 1001 /* Enable the clock. */ 1002 ret = gpmi_enable_clk(this); 1003 if (ret) { 1004 dev_err(this->dev, "We failed in enable the clk\n"); 1005 goto err_out; 1006 } 1007 1008 /* Only initialize the timing once */ 1009 if (this->flags & GPMI_TIMING_INIT_OK) 1010 return; 1011 this->flags |= GPMI_TIMING_INIT_OK; 1012 1013 if (this->flags & GPMI_ASYNC_EDO_ENABLED) 1014 gpmi_compute_edo_timing(this, &hw); 1015 else 1016 gpmi_nfc_compute_hardware_timing(this, &hw); 1017 1018 /* [1] Set HW_GPMI_TIMING0 */ 1019 reg = BF_GPMI_TIMING0_ADDRESS_SETUP(hw.address_setup_in_cycles) | 1020 BF_GPMI_TIMING0_DATA_HOLD(hw.data_hold_in_cycles) | 1021 BF_GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles); 1022 1023 writel(reg, gpmi_regs + HW_GPMI_TIMING0); 1024 1025 /* [2] Set HW_GPMI_TIMING1 */ 1026 writel(BF_GPMI_TIMING1_BUSY_TIMEOUT(hw.device_busy_timeout), 1027 gpmi_regs + HW_GPMI_TIMING1); 1028 1029 /* [3] The following code is to set the HW_GPMI_CTRL1. */ 1030 1031 /* Set the WRN_DLY_SEL */ 1032 writel(BM_GPMI_CTRL1_WRN_DLY_SEL, gpmi_regs + HW_GPMI_CTRL1_CLR); 1033 writel(BF_GPMI_CTRL1_WRN_DLY_SEL(hw.wrn_dly_sel), 1034 gpmi_regs + HW_GPMI_CTRL1_SET); 1035 1036 /* DLL_ENABLE must be set to 0 when setting RDN_DELAY or HALF_PERIOD. */ 1037 writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_CLR); 1038 1039 /* Clear out the DLL control fields. */ 1040 reg = BM_GPMI_CTRL1_RDN_DELAY | BM_GPMI_CTRL1_HALF_PERIOD; 1041 writel(reg, gpmi_regs + HW_GPMI_CTRL1_CLR); 1042 1043 /* If no sample delay is called for, return immediately. */ 1044 if (!hw.sample_delay_factor) 1045 return; 1046 1047 /* Set RDN_DELAY or HALF_PERIOD. */ 1048 reg = ((hw.use_half_periods) ? BM_GPMI_CTRL1_HALF_PERIOD : 0) 1049 | BF_GPMI_CTRL1_RDN_DELAY(hw.sample_delay_factor); 1050 1051 writel(reg, gpmi_regs + HW_GPMI_CTRL1_SET); 1052 1053 /* At last, we enable the DLL. */ 1054 writel(BM_GPMI_CTRL1_DLL_ENABLE, gpmi_regs + HW_GPMI_CTRL1_SET); 1055 1056 /* 1057 * After we enable the GPMI DLL, we have to wait 64 clock cycles before 1058 * we can use the GPMI. Calculate the amount of time we need to wait, 1059 * in microseconds. 1060 */ 1061 clock_period_in_ns = NSEC_PER_SEC / clk_get_rate(r->clock[0]); 1062 dll_wait_time_in_us = (clock_period_in_ns * 64) / 1000; 1063 1064 if (!dll_wait_time_in_us) 1065 dll_wait_time_in_us = 1; 1066 1067 /* Wait for the DLL to settle. */ 1068 udelay(dll_wait_time_in_us); 1069 1070err_out: 1071 return; 1072} 1073 1074void gpmi_end(struct gpmi_nand_data *this) 1075{ 1076 gpmi_disable_clk(this); 1077} 1078 1079/* Clears a BCH interrupt. */ 1080void gpmi_clear_bch(struct gpmi_nand_data *this) 1081{ 1082 struct resources *r = &this->resources; 1083 writel(BM_BCH_CTRL_COMPLETE_IRQ, r->bch_regs + HW_BCH_CTRL_CLR); 1084} 1085 1086/* Returns the Ready/Busy status of the given chip. */ 1087int gpmi_is_ready(struct gpmi_nand_data *this, unsigned chip) 1088{ 1089 struct resources *r = &this->resources; 1090 uint32_t mask = 0; 1091 uint32_t reg = 0; 1092 1093 if (GPMI_IS_MX23(this)) { 1094 mask = MX23_BM_GPMI_DEBUG_READY0 << chip; 1095 reg = readl(r->gpmi_regs + HW_GPMI_DEBUG); 1096 } else if (GPMI_IS_MX28(this) || GPMI_IS_MX6(this)) { 1097 /* 1098 * In the imx6, all the ready/busy pins are bound 1099 * together. So we only need to check chip 0. 1100 */ 1101 if (GPMI_IS_MX6(this)) 1102 chip = 0; 1103 1104 /* MX28 shares the same R/B register as MX6Q. */ 1105 mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip); 1106 reg = readl(r->gpmi_regs + HW_GPMI_STAT); 1107 } else 1108 dev_err(this->dev, "unknown arch.\n"); 1109 return reg & mask; 1110} 1111 1112static inline void set_dma_type(struct gpmi_nand_data *this, 1113 enum dma_ops_type type) 1114{ 1115 this->last_dma_type = this->dma_type; 1116 this->dma_type = type; 1117} 1118 1119int gpmi_send_command(struct gpmi_nand_data *this) 1120{ 1121 struct dma_chan *channel = get_dma_chan(this); 1122 struct dma_async_tx_descriptor *desc; 1123 struct scatterlist *sgl; 1124 int chip = this->current_chip; 1125 u32 pio[3]; 1126 1127 /* [1] send out the PIO words */ 1128 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__WRITE) 1129 | BM_GPMI_CTRL0_WORD_LENGTH 1130 | BF_GPMI_CTRL0_CS(chip, this) 1131 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1132 | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_CLE) 1133 | BM_GPMI_CTRL0_ADDRESS_INCREMENT 1134 | BF_GPMI_CTRL0_XFER_COUNT(this->command_length); 1135 pio[1] = pio[2] = 0; 1136 desc = dmaengine_prep_slave_sg(channel, 1137 (struct scatterlist *)pio, 1138 ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); 1139 if (!desc) 1140 return -EINVAL; 1141 1142 /* [2] send out the COMMAND + ADDRESS string stored in @buffer */ 1143 sgl = &this->cmd_sgl; 1144 1145 sg_init_one(sgl, this->cmd_buffer, this->command_length); 1146 dma_map_sg(this->dev, sgl, 1, DMA_TO_DEVICE); 1147 desc = dmaengine_prep_slave_sg(channel, 1148 sgl, 1, DMA_MEM_TO_DEV, 1149 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1150 if (!desc) 1151 return -EINVAL; 1152 1153 /* [3] submit the DMA */ 1154 set_dma_type(this, DMA_FOR_COMMAND); 1155 return start_dma_without_bch_irq(this, desc); 1156} 1157 1158int gpmi_send_data(struct gpmi_nand_data *this) 1159{ 1160 struct dma_async_tx_descriptor *desc; 1161 struct dma_chan *channel = get_dma_chan(this); 1162 int chip = this->current_chip; 1163 uint32_t command_mode; 1164 uint32_t address; 1165 u32 pio[2]; 1166 1167 /* [1] PIO */ 1168 command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE; 1169 address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; 1170 1171 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) 1172 | BM_GPMI_CTRL0_WORD_LENGTH 1173 | BF_GPMI_CTRL0_CS(chip, this) 1174 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1175 | BF_GPMI_CTRL0_ADDRESS(address) 1176 | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len); 1177 pio[1] = 0; 1178 desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio, 1179 ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); 1180 if (!desc) 1181 return -EINVAL; 1182 1183 /* [2] send DMA request */ 1184 prepare_data_dma(this, DMA_TO_DEVICE); 1185 desc = dmaengine_prep_slave_sg(channel, &this->data_sgl, 1186 1, DMA_MEM_TO_DEV, 1187 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1188 if (!desc) 1189 return -EINVAL; 1190 1191 /* [3] submit the DMA */ 1192 set_dma_type(this, DMA_FOR_WRITE_DATA); 1193 return start_dma_without_bch_irq(this, desc); 1194} 1195 1196int gpmi_read_data(struct gpmi_nand_data *this) 1197{ 1198 struct dma_async_tx_descriptor *desc; 1199 struct dma_chan *channel = get_dma_chan(this); 1200 int chip = this->current_chip; 1201 u32 pio[2]; 1202 1203 /* [1] : send PIO */ 1204 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(BV_GPMI_CTRL0_COMMAND_MODE__READ) 1205 | BM_GPMI_CTRL0_WORD_LENGTH 1206 | BF_GPMI_CTRL0_CS(chip, this) 1207 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1208 | BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_DATA) 1209 | BF_GPMI_CTRL0_XFER_COUNT(this->upper_len); 1210 pio[1] = 0; 1211 desc = dmaengine_prep_slave_sg(channel, 1212 (struct scatterlist *)pio, 1213 ARRAY_SIZE(pio), DMA_TRANS_NONE, 0); 1214 if (!desc) 1215 return -EINVAL; 1216 1217 /* [2] : send DMA request */ 1218 prepare_data_dma(this, DMA_FROM_DEVICE); 1219 desc = dmaengine_prep_slave_sg(channel, &this->data_sgl, 1220 1, DMA_DEV_TO_MEM, 1221 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1222 if (!desc) 1223 return -EINVAL; 1224 1225 /* [3] : submit the DMA */ 1226 set_dma_type(this, DMA_FOR_READ_DATA); 1227 return start_dma_without_bch_irq(this, desc); 1228} 1229 1230int gpmi_send_page(struct gpmi_nand_data *this, 1231 dma_addr_t payload, dma_addr_t auxiliary) 1232{ 1233 struct bch_geometry *geo = &this->bch_geometry; 1234 uint32_t command_mode; 1235 uint32_t address; 1236 uint32_t ecc_command; 1237 uint32_t buffer_mask; 1238 struct dma_async_tx_descriptor *desc; 1239 struct dma_chan *channel = get_dma_chan(this); 1240 int chip = this->current_chip; 1241 u32 pio[6]; 1242 1243 /* A DMA descriptor that does an ECC page read. */ 1244 command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WRITE; 1245 address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; 1246 ecc_command = BV_GPMI_ECCCTRL_ECC_CMD__BCH_ENCODE; 1247 buffer_mask = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE | 1248 BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY; 1249 1250 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) 1251 | BM_GPMI_CTRL0_WORD_LENGTH 1252 | BF_GPMI_CTRL0_CS(chip, this) 1253 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1254 | BF_GPMI_CTRL0_ADDRESS(address) 1255 | BF_GPMI_CTRL0_XFER_COUNT(0); 1256 pio[1] = 0; 1257 pio[2] = BM_GPMI_ECCCTRL_ENABLE_ECC 1258 | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command) 1259 | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask); 1260 pio[3] = geo->page_size; 1261 pio[4] = payload; 1262 pio[5] = auxiliary; 1263 1264 desc = dmaengine_prep_slave_sg(channel, 1265 (struct scatterlist *)pio, 1266 ARRAY_SIZE(pio), DMA_TRANS_NONE, 1267 DMA_CTRL_ACK); 1268 if (!desc) 1269 return -EINVAL; 1270 1271 set_dma_type(this, DMA_FOR_WRITE_ECC_PAGE); 1272 return start_dma_with_bch_irq(this, desc); 1273} 1274 1275int gpmi_read_page(struct gpmi_nand_data *this, 1276 dma_addr_t payload, dma_addr_t auxiliary) 1277{ 1278 struct bch_geometry *geo = &this->bch_geometry; 1279 uint32_t command_mode; 1280 uint32_t address; 1281 uint32_t ecc_command; 1282 uint32_t buffer_mask; 1283 struct dma_async_tx_descriptor *desc; 1284 struct dma_chan *channel = get_dma_chan(this); 1285 int chip = this->current_chip; 1286 u32 pio[6]; 1287 1288 /* [1] Wait for the chip to report ready. */ 1289 command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY; 1290 address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; 1291 1292 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) 1293 | BM_GPMI_CTRL0_WORD_LENGTH 1294 | BF_GPMI_CTRL0_CS(chip, this) 1295 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1296 | BF_GPMI_CTRL0_ADDRESS(address) 1297 | BF_GPMI_CTRL0_XFER_COUNT(0); 1298 pio[1] = 0; 1299 desc = dmaengine_prep_slave_sg(channel, 1300 (struct scatterlist *)pio, 2, 1301 DMA_TRANS_NONE, 0); 1302 if (!desc) 1303 return -EINVAL; 1304 1305 /* [2] Enable the BCH block and read. */ 1306 command_mode = BV_GPMI_CTRL0_COMMAND_MODE__READ; 1307 address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; 1308 ecc_command = BV_GPMI_ECCCTRL_ECC_CMD__BCH_DECODE; 1309 buffer_mask = BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_PAGE 1310 | BV_GPMI_ECCCTRL_BUFFER_MASK__BCH_AUXONLY; 1311 1312 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) 1313 | BM_GPMI_CTRL0_WORD_LENGTH 1314 | BF_GPMI_CTRL0_CS(chip, this) 1315 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1316 | BF_GPMI_CTRL0_ADDRESS(address) 1317 | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size); 1318 1319 pio[1] = 0; 1320 pio[2] = BM_GPMI_ECCCTRL_ENABLE_ECC 1321 | BF_GPMI_ECCCTRL_ECC_CMD(ecc_command) 1322 | BF_GPMI_ECCCTRL_BUFFER_MASK(buffer_mask); 1323 pio[3] = geo->page_size; 1324 pio[4] = payload; 1325 pio[5] = auxiliary; 1326 desc = dmaengine_prep_slave_sg(channel, 1327 (struct scatterlist *)pio, 1328 ARRAY_SIZE(pio), DMA_TRANS_NONE, 1329 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1330 if (!desc) 1331 return -EINVAL; 1332 1333 /* [3] Disable the BCH block */ 1334 command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY; 1335 address = BV_GPMI_CTRL0_ADDRESS__NAND_DATA; 1336 1337 pio[0] = BF_GPMI_CTRL0_COMMAND_MODE(command_mode) 1338 | BM_GPMI_CTRL0_WORD_LENGTH 1339 | BF_GPMI_CTRL0_CS(chip, this) 1340 | BF_GPMI_CTRL0_LOCK_CS(LOCK_CS_ENABLE, this) 1341 | BF_GPMI_CTRL0_ADDRESS(address) 1342 | BF_GPMI_CTRL0_XFER_COUNT(geo->page_size); 1343 pio[1] = 0; 1344 pio[2] = 0; /* clear GPMI_HW_GPMI_ECCCTRL, disable the BCH. */ 1345 desc = dmaengine_prep_slave_sg(channel, 1346 (struct scatterlist *)pio, 3, 1347 DMA_TRANS_NONE, 1348 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1349 if (!desc) 1350 return -EINVAL; 1351 1352 /* [4] submit the DMA */ 1353 set_dma_type(this, DMA_FOR_READ_ECC_PAGE); 1354 return start_dma_with_bch_irq(this, desc); 1355} 1356 1357/** 1358 * gpmi_copy_bits - copy bits from one memory region to another 1359 * @dst: destination buffer 1360 * @dst_bit_off: bit offset we're starting to write at 1361 * @src: source buffer 1362 * @src_bit_off: bit offset we're starting to read from 1363 * @nbits: number of bits to copy 1364 * 1365 * This functions copies bits from one memory region to another, and is used by 1366 * the GPMI driver to copy ECC sections which are not guaranteed to be byte 1367 * aligned. 1368 * 1369 * src and dst should not overlap. 1370 * 1371 */ 1372void gpmi_copy_bits(u8 *dst, size_t dst_bit_off, 1373 const u8 *src, size_t src_bit_off, 1374 size_t nbits) 1375{ 1376 size_t i; 1377 size_t nbytes; 1378 u32 src_buffer = 0; 1379 size_t bits_in_src_buffer = 0; 1380 1381 if (!nbits) 1382 return; 1383 1384 /* 1385 * Move src and dst pointers to the closest byte pointer and store bit 1386 * offsets within a byte. 1387 */ 1388 src += src_bit_off / 8; 1389 src_bit_off %= 8; 1390 1391 dst += dst_bit_off / 8; 1392 dst_bit_off %= 8; 1393 1394 /* 1395 * Initialize the src_buffer value with bits available in the first 1396 * byte of data so that we end up with a byte aligned src pointer. 1397 */ 1398 if (src_bit_off) { 1399 src_buffer = src[0] >> src_bit_off; 1400 if (nbits >= (8 - src_bit_off)) { 1401 bits_in_src_buffer += 8 - src_bit_off; 1402 } else { 1403 src_buffer &= GENMASK(nbits - 1, 0); 1404 bits_in_src_buffer += nbits; 1405 } 1406 nbits -= bits_in_src_buffer; 1407 src++; 1408 } 1409 1410 /* Calculate the number of bytes that can be copied from src to dst. */ 1411 nbytes = nbits / 8; 1412 1413 /* Try to align dst to a byte boundary. */ 1414 if (dst_bit_off) { 1415 if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) { 1416 src_buffer |= src[0] << bits_in_src_buffer; 1417 bits_in_src_buffer += 8; 1418 src++; 1419 nbytes--; 1420 } 1421 1422 if (bits_in_src_buffer >= (8 - dst_bit_off)) { 1423 dst[0] &= GENMASK(dst_bit_off - 1, 0); 1424 dst[0] |= src_buffer << dst_bit_off; 1425 src_buffer >>= (8 - dst_bit_off); 1426 bits_in_src_buffer -= (8 - dst_bit_off); 1427 dst_bit_off = 0; 1428 dst++; 1429 if (bits_in_src_buffer > 7) { 1430 bits_in_src_buffer -= 8; 1431 dst[0] = src_buffer; 1432 dst++; 1433 src_buffer >>= 8; 1434 } 1435 } 1436 } 1437 1438 if (!bits_in_src_buffer && !dst_bit_off) { 1439 /* 1440 * Both src and dst pointers are byte aligned, thus we can 1441 * just use the optimized memcpy function. 1442 */ 1443 if (nbytes) 1444 memcpy(dst, src, nbytes); 1445 } else { 1446 /* 1447 * src buffer is not byte aligned, hence we have to copy each 1448 * src byte to the src_buffer variable before extracting a byte 1449 * to store in dst. 1450 */ 1451 for (i = 0; i < nbytes; i++) { 1452 src_buffer |= src[i] << bits_in_src_buffer; 1453 dst[i] = src_buffer; 1454 src_buffer >>= 8; 1455 } 1456 } 1457 /* Update dst and src pointers */ 1458 dst += nbytes; 1459 src += nbytes; 1460 1461 /* 1462 * nbits is the number of remaining bits. It should not exceed 8 as 1463 * we've already copied as much bytes as possible. 1464 */ 1465 nbits %= 8; 1466 1467 /* 1468 * If there's no more bits to copy to the destination and src buffer 1469 * was already byte aligned, then we're done. 1470 */ 1471 if (!nbits && !bits_in_src_buffer) 1472 return; 1473 1474 /* Copy the remaining bits to src_buffer */ 1475 if (nbits) 1476 src_buffer |= (*src & GENMASK(nbits - 1, 0)) << 1477 bits_in_src_buffer; 1478 bits_in_src_buffer += nbits; 1479 1480 /* 1481 * In case there were not enough bits to get a byte aligned dst buffer 1482 * prepare the src_buffer variable to match the dst organization (shift 1483 * src_buffer by dst_bit_off and retrieve the least significant bits 1484 * from dst). 1485 */ 1486 if (dst_bit_off) 1487 src_buffer = (src_buffer << dst_bit_off) | 1488 (*dst & GENMASK(dst_bit_off - 1, 0)); 1489 bits_in_src_buffer += dst_bit_off; 1490 1491 /* 1492 * Keep most significant bits from dst if we end up with an unaligned 1493 * number of bits. 1494 */ 1495 nbytes = bits_in_src_buffer / 8; 1496 if (bits_in_src_buffer % 8) { 1497 src_buffer |= (dst[nbytes] & 1498 GENMASK(7, bits_in_src_buffer % 8)) << 1499 (nbytes * 8); 1500 nbytes++; 1501 } 1502 1503 /* Copy the remaining bytes to dst */ 1504 for (i = 0; i < nbytes; i++) { 1505 dst[i] = src_buffer; 1506 src_buffer >>= 8; 1507 } 1508} 1509