1/* 2 * Low-Level PCI Express Support for the SH7786 3 * 4 * Copyright (C) 2009 - 2011 Paul Mundt 5 * 6 * This file is subject to the terms and conditions of the GNU General Public 7 * License. See the file "COPYING" in the main directory of this archive 8 * for more details. 9 */ 10#define pr_fmt(fmt) "PCI: " fmt 11 12#include <linux/pci.h> 13#include <linux/init.h> 14#include <linux/kernel.h> 15#include <linux/io.h> 16#include <linux/async.h> 17#include <linux/delay.h> 18#include <linux/slab.h> 19#include <linux/clk.h> 20#include <linux/sh_clk.h> 21#include <linux/sh_intc.h> 22#include "pcie-sh7786.h" 23#include <asm/sizes.h> 24 25struct sh7786_pcie_port { 26 struct pci_channel *hose; 27 struct clk *fclk, phy_clk; 28 unsigned int index; 29 int endpoint; 30 int link; 31}; 32 33static struct sh7786_pcie_port *sh7786_pcie_ports; 34static unsigned int nr_ports; 35 36static struct sh7786_pcie_hwops { 37 int (*core_init)(void); 38 async_func_t port_init_hw; 39} *sh7786_pcie_hwops; 40 41static struct resource sh7786_pci0_resources[] = { 42 { 43 .name = "PCIe0 IO", 44 .start = 0xfd000000, 45 .end = 0xfd000000 + SZ_8M - 1, 46 .flags = IORESOURCE_IO, 47 }, { 48 .name = "PCIe0 MEM 0", 49 .start = 0xc0000000, 50 .end = 0xc0000000 + SZ_512M - 1, 51 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 52 }, { 53 .name = "PCIe0 MEM 1", 54 .start = 0x10000000, 55 .end = 0x10000000 + SZ_64M - 1, 56 .flags = IORESOURCE_MEM, 57 }, { 58 .name = "PCIe0 MEM 2", 59 .start = 0xfe100000, 60 .end = 0xfe100000 + SZ_1M - 1, 61 .flags = IORESOURCE_MEM, 62 }, 63}; 64 65static struct resource sh7786_pci1_resources[] = { 66 { 67 .name = "PCIe1 IO", 68 .start = 0xfd800000, 69 .end = 0xfd800000 + SZ_8M - 1, 70 .flags = IORESOURCE_IO, 71 }, { 72 .name = "PCIe1 MEM 0", 73 .start = 0xa0000000, 74 .end = 0xa0000000 + SZ_512M - 1, 75 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 76 }, { 77 .name = "PCIe1 MEM 1", 78 .start = 0x30000000, 79 .end = 0x30000000 + SZ_256M - 1, 80 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 81 }, { 82 .name = "PCIe1 MEM 2", 83 .start = 0xfe300000, 84 .end = 0xfe300000 + SZ_1M - 1, 85 .flags = IORESOURCE_MEM, 86 }, 87}; 88 89static struct resource sh7786_pci2_resources[] = { 90 { 91 .name = "PCIe2 IO", 92 .start = 0xfc800000, 93 .end = 0xfc800000 + SZ_4M - 1, 94 .flags = IORESOURCE_IO, 95 }, { 96 .name = "PCIe2 MEM 0", 97 .start = 0x80000000, 98 .end = 0x80000000 + SZ_512M - 1, 99 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 100 }, { 101 .name = "PCIe2 MEM 1", 102 .start = 0x20000000, 103 .end = 0x20000000 + SZ_256M - 1, 104 .flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT, 105 }, { 106 .name = "PCIe2 MEM 2", 107 .start = 0xfcd00000, 108 .end = 0xfcd00000 + SZ_1M - 1, 109 .flags = IORESOURCE_MEM, 110 }, 111}; 112 113extern struct pci_ops sh7786_pci_ops; 114 115#define DEFINE_CONTROLLER(start, idx) \ 116{ \ 117 .pci_ops = &sh7786_pci_ops, \ 118 .resources = sh7786_pci##idx##_resources, \ 119 .nr_resources = ARRAY_SIZE(sh7786_pci##idx##_resources), \ 120 .reg_base = start, \ 121 .mem_offset = 0, \ 122 .io_offset = 0, \ 123} 124 125static struct pci_channel sh7786_pci_channels[] = { 126 DEFINE_CONTROLLER(0xfe000000, 0), 127 DEFINE_CONTROLLER(0xfe200000, 1), 128 DEFINE_CONTROLLER(0xfcc00000, 2), 129}; 130 131static struct clk fixed_pciexclkp = { 132 .rate = 100000000, /* 100 MHz reference clock */ 133}; 134 135static void sh7786_pci_fixup(struct pci_dev *dev) 136{ 137 /* 138 * Prevent enumeration of root complex resources. 139 */ 140 if (pci_is_root_bus(dev->bus) && dev->devfn == 0) { 141 int i; 142 143 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 144 dev->resource[i].start = 0; 145 dev->resource[i].end = 0; 146 dev->resource[i].flags = 0; 147 } 148 } 149} 150DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_SH7786, 151 sh7786_pci_fixup); 152 153static int __init phy_wait_for_ack(struct pci_channel *chan) 154{ 155 unsigned int timeout = 100; 156 157 while (timeout--) { 158 if (pci_read_reg(chan, SH4A_PCIEPHYADRR) & (1 << BITS_ACK)) 159 return 0; 160 161 udelay(100); 162 } 163 164 return -ETIMEDOUT; 165} 166 167static int __init pci_wait_for_irq(struct pci_channel *chan, unsigned int mask) 168{ 169 unsigned int timeout = 100; 170 171 while (timeout--) { 172 if ((pci_read_reg(chan, SH4A_PCIEINTR) & mask) == mask) 173 return 0; 174 175 udelay(100); 176 } 177 178 return -ETIMEDOUT; 179} 180 181static void __init phy_write_reg(struct pci_channel *chan, unsigned int addr, 182 unsigned int lane, unsigned int data) 183{ 184 unsigned long phyaddr; 185 186 phyaddr = (1 << BITS_CMD) + ((lane & 0xf) << BITS_LANE) + 187 ((addr & 0xff) << BITS_ADR); 188 189 /* Set write data */ 190 pci_write_reg(chan, data, SH4A_PCIEPHYDOUTR); 191 pci_write_reg(chan, phyaddr, SH4A_PCIEPHYADRR); 192 193 phy_wait_for_ack(chan); 194 195 /* Clear command */ 196 pci_write_reg(chan, 0, SH4A_PCIEPHYDOUTR); 197 pci_write_reg(chan, 0, SH4A_PCIEPHYADRR); 198 199 phy_wait_for_ack(chan); 200} 201 202static int __init pcie_clk_init(struct sh7786_pcie_port *port) 203{ 204 struct pci_channel *chan = port->hose; 205 struct clk *clk; 206 char fclk_name[16]; 207 int ret; 208 209 /* 210 * First register the fixed clock 211 */ 212 ret = clk_register(&fixed_pciexclkp); 213 if (unlikely(ret != 0)) 214 return ret; 215 216 /* 217 * Grab the port's function clock, which the PHY clock depends 218 * on. clock lookups don't help us much at this point, since no 219 * dev_id is available this early. Lame. 220 */ 221 snprintf(fclk_name, sizeof(fclk_name), "pcie%d_fck", port->index); 222 223 port->fclk = clk_get(NULL, fclk_name); 224 if (IS_ERR(port->fclk)) { 225 ret = PTR_ERR(port->fclk); 226 goto err_fclk; 227 } 228 229 clk_enable(port->fclk); 230 231 /* 232 * And now, set up the PHY clock 233 */ 234 clk = &port->phy_clk; 235 236 memset(clk, 0, sizeof(struct clk)); 237 238 clk->parent = &fixed_pciexclkp; 239 clk->enable_reg = (void __iomem *)(chan->reg_base + SH4A_PCIEPHYCTLR); 240 clk->enable_bit = BITS_CKE; 241 242 ret = sh_clk_mstp_register(clk, 1); 243 if (unlikely(ret < 0)) 244 goto err_phy; 245 246 return 0; 247 248err_phy: 249 clk_disable(port->fclk); 250 clk_put(port->fclk); 251err_fclk: 252 clk_unregister(&fixed_pciexclkp); 253 254 return ret; 255} 256 257static int __init phy_init(struct sh7786_pcie_port *port) 258{ 259 struct pci_channel *chan = port->hose; 260 unsigned int timeout = 100; 261 262 clk_enable(&port->phy_clk); 263 264 /* Initialize the phy */ 265 phy_write_reg(chan, 0x60, 0xf, 0x004b008b); 266 phy_write_reg(chan, 0x61, 0xf, 0x00007b41); 267 phy_write_reg(chan, 0x64, 0xf, 0x00ff4f00); 268 phy_write_reg(chan, 0x65, 0xf, 0x09070907); 269 phy_write_reg(chan, 0x66, 0xf, 0x00000010); 270 phy_write_reg(chan, 0x74, 0xf, 0x0007001c); 271 phy_write_reg(chan, 0x79, 0xf, 0x01fc000d); 272 phy_write_reg(chan, 0xb0, 0xf, 0x00000610); 273 274 /* Deassert Standby */ 275 phy_write_reg(chan, 0x67, 0x1, 0x00000400); 276 277 /* Disable clock */ 278 clk_disable(&port->phy_clk); 279 280 while (timeout--) { 281 if (pci_read_reg(chan, SH4A_PCIEPHYSR)) 282 return 0; 283 284 udelay(100); 285 } 286 287 return -ETIMEDOUT; 288} 289 290static void __init pcie_reset(struct sh7786_pcie_port *port) 291{ 292 struct pci_channel *chan = port->hose; 293 294 pci_write_reg(chan, 1, SH4A_PCIESRSTR); 295 pci_write_reg(chan, 0, SH4A_PCIETCTLR); 296 pci_write_reg(chan, 0, SH4A_PCIESRSTR); 297 pci_write_reg(chan, 0, SH4A_PCIETXVC0SR); 298} 299 300static int __init pcie_init(struct sh7786_pcie_port *port) 301{ 302 struct pci_channel *chan = port->hose; 303 unsigned int data; 304 phys_addr_t memphys; 305 size_t memsize; 306 int ret, i, win; 307 308 /* Begin initialization */ 309 pcie_reset(port); 310 311 /* 312 * Initial header for port config space is type 1, set the device 313 * class to match. Hardware takes care of propagating the IDSETR 314 * settings, so there is no need to bother with a quirk. 315 */ 316 pci_write_reg(chan, PCI_CLASS_BRIDGE_PCI << 16, SH4A_PCIEIDSETR1); 317 318 /* Initialize default capabilities. */ 319 data = pci_read_reg(chan, SH4A_PCIEEXPCAP0); 320 data &= ~(PCI_EXP_FLAGS_TYPE << 16); 321 322 if (port->endpoint) 323 data |= PCI_EXP_TYPE_ENDPOINT << 20; 324 else 325 data |= PCI_EXP_TYPE_ROOT_PORT << 20; 326 327 data |= PCI_CAP_ID_EXP; 328 pci_write_reg(chan, data, SH4A_PCIEEXPCAP0); 329 330 /* Enable data link layer active state reporting */ 331 pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3); 332 333 /* Enable extended sync and ASPM L0s support */ 334 data = pci_read_reg(chan, SH4A_PCIEEXPCAP4); 335 data &= ~PCI_EXP_LNKCTL_ASPMC; 336 data |= PCI_EXP_LNKCTL_ES | 1; 337 pci_write_reg(chan, data, SH4A_PCIEEXPCAP4); 338 339 /* Write out the physical slot number */ 340 data = pci_read_reg(chan, SH4A_PCIEEXPCAP5); 341 data &= ~PCI_EXP_SLTCAP_PSN; 342 data |= (port->index + 1) << 19; 343 pci_write_reg(chan, data, SH4A_PCIEEXPCAP5); 344 345 /* Set the completion timer timeout to the maximum 32ms. */ 346 data = pci_read_reg(chan, SH4A_PCIETLCTLR); 347 data &= ~0x3f00; 348 data |= 0x32 << 8; 349 pci_write_reg(chan, data, SH4A_PCIETLCTLR); 350 351 /* 352 * Set fast training sequences to the maximum 255, 353 * and enable MAC data scrambling. 354 */ 355 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 356 data &= ~PCIEMACCTLR_SCR_DIS; 357 data |= (0xff << 16); 358 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 359 360 memphys = __pa(memory_start); 361 memsize = roundup_pow_of_two(memory_end - memory_start); 362 363 /* 364 * If there's more than 512MB of memory, we need to roll over to 365 * LAR1/LAMR1. 366 */ 367 if (memsize > SZ_512M) { 368 pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1); 369 pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1, 370 SH4A_PCIELAMR1); 371 memsize = SZ_512M; 372 } else { 373 /* 374 * Otherwise just zero it out and disable it. 375 */ 376 pci_write_reg(chan, 0, SH4A_PCIELAR1); 377 pci_write_reg(chan, 0, SH4A_PCIELAMR1); 378 } 379 380 /* 381 * LAR0/LAMR0 covers up to the first 512MB, which is enough to 382 * cover all of lowmem on most platforms. 383 */ 384 pci_write_reg(chan, memphys, SH4A_PCIELAR0); 385 pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0); 386 387 /* Finish initialization */ 388 data = pci_read_reg(chan, SH4A_PCIETCTLR); 389 data |= 0x1; 390 pci_write_reg(chan, data, SH4A_PCIETCTLR); 391 392 /* Let things settle down a bit.. */ 393 mdelay(100); 394 395 /* Enable DL_Active Interrupt generation */ 396 data = pci_read_reg(chan, SH4A_PCIEDLINTENR); 397 data |= PCIEDLINTENR_DLL_ACT_ENABLE; 398 pci_write_reg(chan, data, SH4A_PCIEDLINTENR); 399 400 /* Disable MAC data scrambling. */ 401 data = pci_read_reg(chan, SH4A_PCIEMACCTLR); 402 data |= PCIEMACCTLR_SCR_DIS | (0xff << 16); 403 pci_write_reg(chan, data, SH4A_PCIEMACCTLR); 404 405 /* 406 * This will timeout if we don't have a link, but we permit the 407 * port to register anyways in order to support hotplug on future 408 * hardware. 409 */ 410 ret = pci_wait_for_irq(chan, MASK_INT_TX_CTRL); 411 412 data = pci_read_reg(chan, SH4A_PCIEPCICONF1); 413 data &= ~(PCI_STATUS_DEVSEL_MASK << 16); 414 data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | 415 (PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16; 416 pci_write_reg(chan, data, SH4A_PCIEPCICONF1); 417 418 pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR); 419 pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR); 420 421 wmb(); 422 423 if (ret == 0) { 424 data = pci_read_reg(chan, SH4A_PCIEMACSR); 425 printk(KERN_NOTICE "PCI: PCIe#%d x%d link detected\n", 426 port->index, (data >> 20) & 0x3f); 427 } else 428 printk(KERN_NOTICE "PCI: PCIe#%d link down\n", 429 port->index); 430 431 for (i = win = 0; i < chan->nr_resources; i++) { 432 struct resource *res = chan->resources + i; 433 resource_size_t size; 434 u32 mask; 435 436 /* 437 * We can't use the 32-bit mode windows in legacy 29-bit 438 * mode, so just skip them entirely. 439 */ 440 if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode()) 441 continue; 442 443 pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win)); 444 445 /* 446 * The PAMR mask is calculated in units of 256kB, which 447 * keeps things pretty simple. 448 */ 449 size = resource_size(res); 450 mask = (roundup_pow_of_two(size) / SZ_256K) - 1; 451 pci_write_reg(chan, mask << 18, SH4A_PCIEPAMR(win)); 452 453 pci_write_reg(chan, upper_32_bits(res->start), 454 SH4A_PCIEPARH(win)); 455 pci_write_reg(chan, lower_32_bits(res->start), 456 SH4A_PCIEPARL(win)); 457 458 mask = MASK_PARE; 459 if (res->flags & IORESOURCE_IO) 460 mask |= MASK_SPC; 461 462 pci_write_reg(chan, mask, SH4A_PCIEPTCTLR(win)); 463 464 win++; 465 } 466 467 return 0; 468} 469 470int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) 471{ 472 return evt2irq(0xae0); 473} 474 475static int __init sh7786_pcie_core_init(void) 476{ 477 /* Return the number of ports */ 478 return test_mode_pin(MODE_PIN12) ? 3 : 2; 479} 480 481static void __init sh7786_pcie_init_hw(void *data, async_cookie_t cookie) 482{ 483 struct sh7786_pcie_port *port = data; 484 int ret; 485 486 /* 487 * Check if we are configured in endpoint or root complex mode, 488 * this is a fixed pin setting that applies to all PCIe ports. 489 */ 490 port->endpoint = test_mode_pin(MODE_PIN11); 491 492 /* 493 * Setup clocks, needed both for PHY and PCIe registers. 494 */ 495 ret = pcie_clk_init(port); 496 if (unlikely(ret < 0)) { 497 pr_err("clock initialization failed for port#%d\n", 498 port->index); 499 return; 500 } 501 502 ret = phy_init(port); 503 if (unlikely(ret < 0)) { 504 pr_err("phy initialization failed for port#%d\n", 505 port->index); 506 return; 507 } 508 509 ret = pcie_init(port); 510 if (unlikely(ret < 0)) { 511 pr_err("core initialization failed for port#%d\n", 512 port->index); 513 return; 514 } 515 516 /* In the interest of preserving device ordering, synchronize */ 517 async_synchronize_cookie(cookie); 518 519 register_pci_controller(port->hose); 520} 521 522static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = { 523 .core_init = sh7786_pcie_core_init, 524 .port_init_hw = sh7786_pcie_init_hw, 525}; 526 527static int __init sh7786_pcie_init(void) 528{ 529 struct clk *platclk; 530 int i; 531 532 printk(KERN_NOTICE "PCI: Starting initialization.\n"); 533 534 sh7786_pcie_hwops = &sh7786_65nm_pcie_hwops; 535 536 nr_ports = sh7786_pcie_hwops->core_init(); 537 BUG_ON(nr_ports > ARRAY_SIZE(sh7786_pci_channels)); 538 539 if (unlikely(nr_ports == 0)) 540 return -ENODEV; 541 542 sh7786_pcie_ports = kzalloc(nr_ports * sizeof(struct sh7786_pcie_port), 543 GFP_KERNEL); 544 if (unlikely(!sh7786_pcie_ports)) 545 return -ENOMEM; 546 547 /* 548 * Fetch any optional platform clock associated with this block. 549 * 550 * This is a rather nasty hack for boards with spec-mocking FPGAs 551 * that have a secondary set of clocks outside of the on-chip 552 * ones that need to be accounted for before there is any chance 553 * of touching the existing MSTP bits or CPG clocks. 554 */ 555 platclk = clk_get(NULL, "pcie_plat_clk"); 556 if (IS_ERR(platclk)) { 557 /* Sane hardware should probably get a WARN_ON.. */ 558 platclk = NULL; 559 } 560 561 clk_enable(platclk); 562 563 printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports); 564 565 for (i = 0; i < nr_ports; i++) { 566 struct sh7786_pcie_port *port = sh7786_pcie_ports + i; 567 568 port->index = i; 569 port->hose = sh7786_pci_channels + i; 570 port->hose->io_map_base = port->hose->resources[0].start; 571 572 async_schedule(sh7786_pcie_hwops->port_init_hw, port); 573 } 574 575 async_synchronize_full(); 576 577 return 0; 578} 579arch_initcall(sh7786_pcie_init); 580