1/* 2 * Linux performance counter support for ARC700 series 3 * 4 * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) 5 * 6 * This code is inspired by the perf support of various other architectures. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 * 12 */ 13#include <linux/errno.h> 14#include <linux/module.h> 15#include <linux/of.h> 16#include <linux/perf_event.h> 17#include <linux/platform_device.h> 18#include <asm/arcregs.h> 19#include <asm/stacktrace.h> 20 21struct arc_pmu { 22 struct pmu pmu; 23 int counter_size; /* in bits */ 24 int n_counters; 25 unsigned long used_mask[BITS_TO_LONGS(ARC_PMU_MAX_HWEVENTS)]; 26 int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; 27}; 28 29struct arc_callchain_trace { 30 int depth; 31 void *perf_stuff; 32}; 33 34static int callchain_trace(unsigned int addr, void *data) 35{ 36 struct arc_callchain_trace *ctrl = data; 37 struct perf_callchain_entry *entry = ctrl->perf_stuff; 38 perf_callchain_store(entry, addr); 39 40 if (ctrl->depth++ < 3) 41 return 0; 42 43 return -1; 44} 45 46void 47perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) 48{ 49 struct arc_callchain_trace ctrl = { 50 .depth = 0, 51 .perf_stuff = entry, 52 }; 53 54 arc_unwind_core(NULL, regs, callchain_trace, &ctrl); 55} 56 57void 58perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 59{ 60 /* 61 * User stack can't be unwound trivially with kernel dwarf unwinder 62 * So for now just record the user PC 63 */ 64 perf_callchain_store(entry, instruction_pointer(regs)); 65} 66 67static struct arc_pmu *arc_pmu; 68 69/* read counter #idx; note that counter# != event# on ARC! */ 70static uint64_t arc_pmu_read_counter(int idx) 71{ 72 uint32_t tmp; 73 uint64_t result; 74 75 /* 76 * ARC supports making 'snapshots' of the counters, so we don't 77 * need to care about counters wrapping to 0 underneath our feet 78 */ 79 write_aux_reg(ARC_REG_PCT_INDEX, idx); 80 tmp = read_aux_reg(ARC_REG_PCT_CONTROL); 81 write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN); 82 result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32; 83 result |= read_aux_reg(ARC_REG_PCT_SNAPL); 84 85 return result; 86} 87 88static void arc_perf_event_update(struct perf_event *event, 89 struct hw_perf_event *hwc, int idx) 90{ 91 uint64_t prev_raw_count, new_raw_count; 92 int64_t delta; 93 94 do { 95 prev_raw_count = local64_read(&hwc->prev_count); 96 new_raw_count = arc_pmu_read_counter(idx); 97 } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 98 new_raw_count) != prev_raw_count); 99 100 delta = (new_raw_count - prev_raw_count) & 101 ((1ULL << arc_pmu->counter_size) - 1ULL); 102 103 local64_add(delta, &event->count); 104 local64_sub(delta, &hwc->period_left); 105} 106 107static void arc_pmu_read(struct perf_event *event) 108{ 109 arc_perf_event_update(event, &event->hw, event->hw.idx); 110} 111 112static int arc_pmu_cache_event(u64 config) 113{ 114 unsigned int cache_type, cache_op, cache_result; 115 int ret; 116 117 cache_type = (config >> 0) & 0xff; 118 cache_op = (config >> 8) & 0xff; 119 cache_result = (config >> 16) & 0xff; 120 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 121 return -EINVAL; 122 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 123 return -EINVAL; 124 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 125 return -EINVAL; 126 127 ret = arc_pmu_cache_map[cache_type][cache_op][cache_result]; 128 129 if (ret == CACHE_OP_UNSUPPORTED) 130 return -ENOENT; 131 132 pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", 133 cache_type, cache_op, cache_result, ret, 134 arc_pmu_ev_hw_map[ret]); 135 136 return ret; 137} 138 139/* initializes hw_perf_event structure if event is supported */ 140static int arc_pmu_event_init(struct perf_event *event) 141{ 142 struct hw_perf_event *hwc = &event->hw; 143 int ret; 144 145 switch (event->attr.type) { 146 case PERF_TYPE_HARDWARE: 147 if (event->attr.config >= PERF_COUNT_HW_MAX) 148 return -ENOENT; 149 if (arc_pmu->ev_hw_idx[event->attr.config] < 0) 150 return -ENOENT; 151 hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; 152 pr_debug("init event %d with h/w %d \'%s\'\n", 153 (int) event->attr.config, (int) hwc->config, 154 arc_pmu_ev_hw_map[event->attr.config]); 155 return 0; 156 case PERF_TYPE_HW_CACHE: 157 ret = arc_pmu_cache_event(event->attr.config); 158 if (ret < 0) 159 return ret; 160 hwc->config = arc_pmu->ev_hw_idx[ret]; 161 return 0; 162 default: 163 return -ENOENT; 164 } 165} 166 167/* starts all counters */ 168static void arc_pmu_enable(struct pmu *pmu) 169{ 170 uint32_t tmp; 171 tmp = read_aux_reg(ARC_REG_PCT_CONTROL); 172 write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1); 173} 174 175/* stops all counters */ 176static void arc_pmu_disable(struct pmu *pmu) 177{ 178 uint32_t tmp; 179 tmp = read_aux_reg(ARC_REG_PCT_CONTROL); 180 write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0); 181} 182 183/* 184 * Assigns hardware counter to hardware condition. 185 * Note that there is no separate start/stop mechanism; 186 * stopping is achieved by assigning the 'never' condition 187 */ 188static void arc_pmu_start(struct perf_event *event, int flags) 189{ 190 struct hw_perf_event *hwc = &event->hw; 191 int idx = hwc->idx; 192 193 if (WARN_ON_ONCE(idx == -1)) 194 return; 195 196 if (flags & PERF_EF_RELOAD) 197 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 198 199 event->hw.state = 0; 200 201 /* enable ARC pmu here */ 202 write_aux_reg(ARC_REG_PCT_INDEX, idx); 203 write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); 204} 205 206static void arc_pmu_stop(struct perf_event *event, int flags) 207{ 208 struct hw_perf_event *hwc = &event->hw; 209 int idx = hwc->idx; 210 211 if (!(event->hw.state & PERF_HES_STOPPED)) { 212 /* stop ARC pmu here */ 213 write_aux_reg(ARC_REG_PCT_INDEX, idx); 214 215 /* condition code #0 is always "never" */ 216 write_aux_reg(ARC_REG_PCT_CONFIG, 0); 217 218 event->hw.state |= PERF_HES_STOPPED; 219 } 220 221 if ((flags & PERF_EF_UPDATE) && 222 !(event->hw.state & PERF_HES_UPTODATE)) { 223 arc_perf_event_update(event, &event->hw, idx); 224 event->hw.state |= PERF_HES_UPTODATE; 225 } 226} 227 228static void arc_pmu_del(struct perf_event *event, int flags) 229{ 230 arc_pmu_stop(event, PERF_EF_UPDATE); 231 __clear_bit(event->hw.idx, arc_pmu->used_mask); 232 233 perf_event_update_userpage(event); 234} 235 236/* allocate hardware counter and optionally start counting */ 237static int arc_pmu_add(struct perf_event *event, int flags) 238{ 239 struct hw_perf_event *hwc = &event->hw; 240 int idx = hwc->idx; 241 242 if (__test_and_set_bit(idx, arc_pmu->used_mask)) { 243 idx = find_first_zero_bit(arc_pmu->used_mask, 244 arc_pmu->n_counters); 245 if (idx == arc_pmu->n_counters) 246 return -EAGAIN; 247 248 __set_bit(idx, arc_pmu->used_mask); 249 hwc->idx = idx; 250 } 251 252 write_aux_reg(ARC_REG_PCT_INDEX, idx); 253 write_aux_reg(ARC_REG_PCT_CONFIG, 0); 254 write_aux_reg(ARC_REG_PCT_COUNTL, 0); 255 write_aux_reg(ARC_REG_PCT_COUNTH, 0); 256 local64_set(&hwc->prev_count, 0); 257 258 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 259 if (flags & PERF_EF_START) 260 arc_pmu_start(event, PERF_EF_RELOAD); 261 262 perf_event_update_userpage(event); 263 264 return 0; 265} 266 267static int arc_pmu_device_probe(struct platform_device *pdev) 268{ 269 struct arc_reg_pct_build pct_bcr; 270 struct arc_reg_cc_build cc_bcr; 271 int i, j, ret; 272 273 union cc_name { 274 struct { 275 uint32_t word0, word1; 276 char sentinel; 277 } indiv; 278 char str[9]; 279 } cc_name; 280 281 282 READ_BCR(ARC_REG_PCT_BUILD, pct_bcr); 283 if (!pct_bcr.v) { 284 pr_err("This core does not have performance counters!\n"); 285 return -ENODEV; 286 } 287 BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS); 288 289 READ_BCR(ARC_REG_CC_BUILD, cc_bcr); 290 BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */ 291 292 arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL); 293 if (!arc_pmu) 294 return -ENOMEM; 295 296 arc_pmu->n_counters = pct_bcr.c; 297 arc_pmu->counter_size = 32 + (pct_bcr.s << 4); 298 299 pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n", 300 arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); 301 302 cc_name.str[8] = 0; 303 for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) 304 arc_pmu->ev_hw_idx[i] = -1; 305 306 /* loop thru all available h/w condition indexes */ 307 for (j = 0; j < cc_bcr.c; j++) { 308 write_aux_reg(ARC_REG_CC_INDEX, j); 309 cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); 310 cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); 311 312 /* See if it has been mapped to a perf event_id */ 313 for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { 314 if (arc_pmu_ev_hw_map[i] && 315 !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && 316 strlen(arc_pmu_ev_hw_map[i])) { 317 pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", 318 i, cc_name.str, j); 319 arc_pmu->ev_hw_idx[i] = j; 320 } 321 } 322 } 323 324 arc_pmu->pmu = (struct pmu) { 325 .pmu_enable = arc_pmu_enable, 326 .pmu_disable = arc_pmu_disable, 327 .event_init = arc_pmu_event_init, 328 .add = arc_pmu_add, 329 .del = arc_pmu_del, 330 .start = arc_pmu_start, 331 .stop = arc_pmu_stop, 332 .read = arc_pmu_read, 333 }; 334 335 /* ARC 700 PMU does not support sampling events */ 336 arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 337 338 ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); 339 340 return ret; 341} 342 343#ifdef CONFIG_OF 344static const struct of_device_id arc_pmu_match[] = { 345 { .compatible = "snps,arc700-pct" }, 346 {}, 347}; 348MODULE_DEVICE_TABLE(of, arc_pmu_match); 349#endif 350 351static struct platform_driver arc_pmu_driver = { 352 .driver = { 353 .name = "arc700-pct", 354 .of_match_table = of_match_ptr(arc_pmu_match), 355 }, 356 .probe = arc_pmu_device_probe, 357}; 358 359module_platform_driver(arc_pmu_driver); 360 361MODULE_LICENSE("GPL"); 362MODULE_AUTHOR("Mischa Jonker <mjonker@synopsys.com>"); 363MODULE_DESCRIPTION("ARC PMU driver"); 364