root/tools/perf/util/s390-cpumsf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. s390_cpumcf_dumpctr
  2. s390_cpumsf_basic_show
  3. s390_cpumsf_diag_show
  4. trailer_timestamp
  5. s390_cpumsf_trailer_show
  6. s390_cpumsf_validate
  7. s390_cpumsf_reached_trailer
  8. s390_cpumsf_dump
  9. s390_cpumsf_dump_event
  10. s390_cpumsf_make_event
  11. get_trailer_time
  12. s390_cpumsf_samples
  13. s390_cpumsf_run_decoder
  14. s390_cpumsf_alloc_queue
  15. s390_cpumsf_setup_queue
  16. s390_cpumsf_setup_queues
  17. s390_cpumsf_update_queues
  18. s390_cpumsf_process_queues
  19. s390_cpumsf_synth_error
  20. s390_cpumsf_lost
  21. s390_cpumsf_process_event
  22. s390_cpumsf_process_auxtrace_event
  23. s390_cpumsf_free_events
  24. s390_cpumsf_flush
  25. s390_cpumsf_free_queues
  26. s390_cpumsf_free
  27. s390_cpumsf_get_type
  28. check_auxtrace_itrace
  29. s390_cpumsf__config
  30. s390_cpumsf_process_auxtrace_info

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright IBM Corp. 2018
   4  * Auxtrace support for s390 CPU-Measurement Sampling Facility
   5  *
   6  * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
   7  *
   8  * Auxiliary traces are collected during 'perf record' using rbd000 event.
   9  * Several PERF_RECORD_XXX are generated during recording:
  10  *
  11  * PERF_RECORD_AUX:
  12  *      Records that new data landed in the AUX buffer part.
  13  * PERF_RECORD_AUXTRACE:
  14  *      Defines auxtrace data. Followed by the actual data. The contents of
  15  *      the auxtrace data is dependent on the event and the CPU.
  16  *      This record is generated by perf record command. For details
  17  *      see Documentation/perf.data-file-format.txt.
  18  * PERF_RECORD_AUXTRACE_INFO:
  19  *      Defines a table of contains for PERF_RECORD_AUXTRACE records. This
  20  *      record is generated during 'perf record' command. Each record contains
  21  *      up to 256 entries describing offset and size of the AUXTRACE data in the
  22  *      perf.data file.
  23  * PERF_RECORD_AUXTRACE_ERROR:
  24  *      Indicates an error during AUXTRACE collection such as buffer overflow.
  25  * PERF_RECORD_FINISHED_ROUND:
  26  *      Perf events are not necessarily in time stamp order, as they can be
  27  *      collected in parallel on different CPUs. If the events should be
  28  *      processed in time order they need to be sorted first.
  29  *      Perf report guarantees that there is no reordering over a
  30  *      PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
  31  *      time stamp lower than this record are processed (and displayed) before
  32  *      the succeeding perf record are processed.
  33  *
  34  * These records are evaluated during perf report command.
  35  *
  36  * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
  37  * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
  38  * below.
  39  * Auxiliary trace data is collected per CPU. To merge the data into the report
  40  * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
  41  * data is in ascending order.
  42  *
  43  * Each queue has a double linked list of auxtrace_buffers. This list contains
  44  * the offset and size of a CPU's auxtrace data. During auxtrace processing
  45  * the data portion is mmap()'ed.
  46  *
  47  * To sort the queues in chronological order, all queue access is controlled
  48  * by the auxtrace_heap. This is basicly a stack, each stack element has two
  49  * entries, the queue number and a time stamp. However the stack is sorted by
  50  * the time stamps. The highest time stamp is at the bottom the lowest
  51  * (nearest) time stamp is at the top. That sort order is maintained at all
  52  * times!
  53  *
  54  * After the auxtrace infrastructure has been setup, the auxtrace queues are
  55  * filled with data (offset/size pairs) and the auxtrace_heap is populated.
  56  *
  57  * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
  58  * Each record is handled by s390_cpumsf_process_event(). The time stamp of
  59  * the perf record is compared with the time stamp located on the auxtrace_heap
  60  * top element. If that time stamp is lower than the time stamp from the
  61  * record sample, the auxtrace queues will be processed. As auxtrace queues
  62  * control many auxtrace_buffers and each buffer can be quite large, the
  63  * auxtrace buffer might be processed only partially. In this case the
  64  * position in the auxtrace_buffer of that queue is remembered and the time
  65  * stamp of the last processed entry of the auxtrace_buffer replaces the
  66  * current auxtrace_heap top.
  67  *
  68  * 3. Auxtrace_queues might run of out data and are feeded by the
  69  * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
  70  *
  71  * Event Generation
  72  * Each sampling-data entry in the auxilary trace data generates a perf sample.
  73  * This sample is filled
  74  * with data from the auxtrace such as PID/TID, instruction address, CPU state,
  75  * etc. This sample is processed with perf_session__deliver_synth_event() to
  76  * be included into the GUI.
  77  *
  78  * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
  79  * auxiliary traces entries until the time stamp of this record is reached
  80  * auxtrace_heap top. This is triggered by ordered_event->deliver().
  81  *
  82  *
  83  * Perf event processing.
  84  * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
  85  * This is the function call sequence:
  86  *
  87  * __cmd_report()
  88  * |
  89  * perf_session__process_events()
  90  * |
  91  * __perf_session__process_events()
  92  * |
  93  * perf_session__process_event()
  94  * |  This functions splits the PERF_RECORD_XXX records.
  95  * |  - Those generated by perf record command (type number equal or higher
  96  * |    than PERF_RECORD_USER_TYPE_START) are handled by
  97  * |    perf_session__process_user_event(see below)
  98  * |  - Those generated by the kernel are handled by
  99  * |    perf_evlist__parse_sample_timestamp()
 100  * |
 101  * perf_evlist__parse_sample_timestamp()
 102  * |  Extract time stamp from sample data.
 103  * |
 104  * perf_session__queue_event()
 105  * |  If timestamp is positive the sample is entered into an ordered_event
 106  * |  list, sort order is the timestamp. The event processing is deferred until
 107  * |  later (see perf_session__process_user_event()).
 108  * |  Other timestamps (0 or -1) are handled immediately by
 109  * |  perf_session__deliver_event(). These are events generated at start up
 110  * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
 111  * |  records. They are needed to create a list of running processes and its
 112  * |  memory mappings and layout. They are needed at the beginning to enable
 113  * |  command perf report to create process trees and memory mappings.
 114  * |
 115  * perf_session__deliver_event()
 116  * |  Delivers a PERF_RECORD_XXX entry for handling.
 117  * |
 118  * auxtrace__process_event()
 119  * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
 120  * |  time stamps from the auxiliary trace buffers. This enables
 121  * |  synchronization between auxiliary trace data and the events on the
 122  * |  perf.data file.
 123  * |
 124  * machine__deliver_event()
 125  * |  Handles the PERF_RECORD_XXX event. This depends on the record type.
 126  *    It might update the process tree, update a process memory map or enter
 127  *    a sample with IP and call back chain data into GUI data pool.
 128  *
 129  *
 130  * Deferred processing determined by perf_session__process_user_event() is
 131  * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
 132  * are generated during command perf record.
 133  * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
 134  * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
 135  * built up while reading the perf.data file.
 136  * Each event is now processed by calling perf_session__deliver_event().
 137  * This enables time synchronization between the data in the perf.data file and
 138  * the data in the auxiliary trace buffers.
 139  */
 140 
 141 #include <endian.h>
 142 #include <errno.h>
 143 #include <byteswap.h>
 144 #include <inttypes.h>
 145 #include <linux/kernel.h>
 146 #include <linux/types.h>
 147 #include <linux/bitops.h>
 148 #include <linux/log2.h>
 149 #include <linux/zalloc.h>
 150 
 151 #include <sys/stat.h>
 152 #include <sys/types.h>
 153 
 154 #include "color.h"
 155 #include "evsel.h"
 156 #include "evlist.h"
 157 #include "machine.h"
 158 #include "session.h"
 159 #include "tool.h"
 160 #include "debug.h"
 161 #include "auxtrace.h"
 162 #include "s390-cpumsf.h"
 163 #include "s390-cpumsf-kernel.h"
 164 #include "s390-cpumcf-kernel.h"
 165 #include "config.h"
 166 
 167 struct s390_cpumsf {
 168         struct auxtrace         auxtrace;
 169         struct auxtrace_queues  queues;
 170         struct auxtrace_heap    heap;
 171         struct perf_session     *session;
 172         struct machine          *machine;
 173         u32                     auxtrace_type;
 174         u32                     pmu_type;
 175         u16                     machine_type;
 176         bool                    data_queued;
 177         bool                    use_logfile;
 178         char                    *logdir;
 179 };
 180 
 181 struct s390_cpumsf_queue {
 182         struct s390_cpumsf      *sf;
 183         unsigned int            queue_nr;
 184         struct auxtrace_buffer  *buffer;
 185         int                     cpu;
 186         FILE                    *logfile;
 187         FILE                    *logfile_ctr;
 188 };
 189 
 190 /* Check if the raw data should be dumped to file. If this is the case and
 191  * the file to dump to has not been opened for writing, do so.
 192  *
 193  * Return 0 on success and greater zero on error so processing continues.
 194  */
 195 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
 196                                struct perf_sample *sample)
 197 {
 198         struct s390_cpumsf_queue *sfq;
 199         struct auxtrace_queue *q;
 200         int rc = 0;
 201 
 202         if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
 203                 return rc;
 204 
 205         q = &sf->queues.queue_array[sample->cpu];
 206         sfq = q->priv;
 207         if (!sfq)               /* Queue not yet allocated */
 208                 return rc;
 209 
 210         if (!sfq->logfile_ctr) {
 211                 char *name;
 212 
 213                 rc = (sf->logdir)
 214                         ? asprintf(&name, "%s/aux.ctr.%02x",
 215                                  sf->logdir, sample->cpu)
 216                         : asprintf(&name, "aux.ctr.%02x", sample->cpu);
 217                 if (rc > 0)
 218                         sfq->logfile_ctr = fopen(name, "w");
 219                 if (sfq->logfile_ctr == NULL) {
 220                         pr_err("Failed to open counter set log file %s, "
 221                                "continue...\n", name);
 222                         rc = 1;
 223                 }
 224                 free(name);
 225         }
 226 
 227         if (sfq->logfile_ctr) {
 228                 /* See comment above for -4 */
 229                 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
 230                                   sfq->logfile_ctr);
 231                 if (n != 1) {
 232                         pr_err("Failed to write counter set data\n");
 233                         rc = 1;
 234                 }
 235         }
 236         return rc;
 237 }
 238 
 239 /* Display s390 CPU measurement facility basic-sampling data entry
 240  * Data written on s390 in big endian byte order and contains bit
 241  * fields across byte boundaries.
 242  */
 243 static bool s390_cpumsf_basic_show(const char *color, size_t pos,
 244                                    struct hws_basic_entry *basicp)
 245 {
 246         struct hws_basic_entry *basic = basicp;
 247 #if __BYTE_ORDER == __LITTLE_ENDIAN
 248         struct hws_basic_entry local;
 249         unsigned long long word = be64toh(*(unsigned long long *)basicp);
 250 
 251         memset(&local, 0, sizeof(local));
 252         local.def = be16toh(basicp->def);
 253         local.prim_asn = word & 0xffff;
 254         local.CL = word >> 30 & 0x3;
 255         local.I = word >> 32 & 0x1;
 256         local.AS = word >> 33 & 0x3;
 257         local.P = word >> 35 & 0x1;
 258         local.W = word >> 36 & 0x1;
 259         local.T = word >> 37 & 0x1;
 260         local.U = word >> 40 & 0xf;
 261         local.ia = be64toh(basicp->ia);
 262         local.gpp = be64toh(basicp->gpp);
 263         local.hpp = be64toh(basicp->hpp);
 264         basic = &local;
 265 #endif
 266         if (basic->def != 1) {
 267                 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
 268                 return false;
 269         }
 270         color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x"
 271                       " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
 272                       "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
 273                       pos, basic->def, basic->U,
 274                       basic->T ? 'T' : ' ',
 275                       basic->W ? 'W' : ' ',
 276                       basic->P ? 'P' : ' ',
 277                       basic->I ? 'I' : ' ',
 278                       basic->AS, basic->prim_asn, basic->ia, basic->CL,
 279                       basic->hpp, basic->gpp);
 280         return true;
 281 }
 282 
 283 /* Display s390 CPU measurement facility diagnostic-sampling data entry.
 284  * Data written on s390 in big endian byte order and contains bit
 285  * fields across byte boundaries.
 286  */
 287 static bool s390_cpumsf_diag_show(const char *color, size_t pos,
 288                                   struct hws_diag_entry *diagp)
 289 {
 290         struct hws_diag_entry *diag = diagp;
 291 #if __BYTE_ORDER == __LITTLE_ENDIAN
 292         struct hws_diag_entry local;
 293         unsigned long long word = be64toh(*(unsigned long long *)diagp);
 294 
 295         local.def = be16toh(diagp->def);
 296         local.I = word >> 32 & 0x1;
 297         diag = &local;
 298 #endif
 299         if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
 300                 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
 301                 return false;
 302         }
 303         color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n",
 304                       pos, diag->def, diag->I ? 'I' : ' ');
 305         return true;
 306 }
 307 
 308 /* Return TOD timestamp contained in an trailer entry */
 309 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
 310                                             int idx)
 311 {
 312         /* te->t set: TOD in STCKE format, bytes 8-15
 313          * to->t not set: TOD in STCK format, bytes 0-7
 314          */
 315         unsigned long long ts;
 316 
 317         memcpy(&ts, &te->timestamp[idx], sizeof(ts));
 318         return be64toh(ts);
 319 }
 320 
 321 /* Display s390 CPU measurement facility trailer entry */
 322 static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
 323                                      struct hws_trailer_entry *te)
 324 {
 325 #if __BYTE_ORDER == __LITTLE_ENDIAN
 326         struct hws_trailer_entry local;
 327         const unsigned long long flags = be64toh(te->flags);
 328 
 329         memset(&local, 0, sizeof(local));
 330         local.f = flags >> 63 & 0x1;
 331         local.a = flags >> 62 & 0x1;
 332         local.t = flags >> 61 & 0x1;
 333         local.bsdes = be16toh((flags >> 16 & 0xffff));
 334         local.dsdes = be16toh((flags & 0xffff));
 335         memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
 336         local.overflow = be64toh(te->overflow);
 337         local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
 338         local.progusage2 = be64toh(te->progusage2);
 339         te = &local;
 340 #endif
 341         if (te->bsdes != sizeof(struct hws_basic_entry)) {
 342                 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
 343                 return false;
 344         }
 345         color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d"
 346                       " dsdes:%d Overflow:%lld Time:%#llx\n"
 347                       "\t\tC:%d TOD:%#lx\n",
 348                       pos,
 349                       te->f ? 'F' : ' ',
 350                       te->a ? 'A' : ' ',
 351                       te->t ? 'T' : ' ',
 352                       te->bsdes, te->dsdes, te->overflow,
 353                       trailer_timestamp(te, te->clock_base),
 354                       te->clock_base, te->progusage2);
 355         return true;
 356 }
 357 
 358 /* Test a sample data block. It must be 4KB or a multiple thereof in size and
 359  * 4KB page aligned. Each sample data page has a trailer entry at the
 360  * end which contains the sample entry data sizes.
 361  *
 362  * Return true if the sample data block passes the checks and set the
 363  * basic set entry size and diagnostic set entry size.
 364  *
 365  * Return false on failure.
 366  *
 367  * Note: Old hardware does not set the basic or diagnostic entry sizes
 368  * in the trailer entry. Use the type number instead.
 369  */
 370 static bool s390_cpumsf_validate(int machine_type,
 371                                  unsigned char *buf, size_t len,
 372                                  unsigned short *bsdes,
 373                                  unsigned short *dsdes)
 374 {
 375         struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
 376         struct hws_trailer_entry *te;
 377 
 378         *dsdes = *bsdes = 0;
 379         if (len & (S390_CPUMSF_PAGESZ - 1))     /* Illegal size */
 380                 return false;
 381         if (be16toh(basic->def) != 1)   /* No basic set entry, must be first */
 382                 return false;
 383         /* Check for trailer entry at end of SDB */
 384         te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
 385                                               - sizeof(*te));
 386         *bsdes = be16toh(te->bsdes);
 387         *dsdes = be16toh(te->dsdes);
 388         if (!te->bsdes && !te->dsdes) {
 389                 /* Very old hardware, use CPUID */
 390                 switch (machine_type) {
 391                 case 2097:
 392                 case 2098:
 393                         *dsdes = 64;
 394                         *bsdes = 32;
 395                         break;
 396                 case 2817:
 397                 case 2818:
 398                         *dsdes = 74;
 399                         *bsdes = 32;
 400                         break;
 401                 case 2827:
 402                 case 2828:
 403                         *dsdes = 85;
 404                         *bsdes = 32;
 405                         break;
 406                 case 2964:
 407                 case 2965:
 408                         *dsdes = 112;
 409                         *bsdes = 32;
 410                         break;
 411                 default:
 412                         /* Illegal trailer entry */
 413                         return false;
 414                 }
 415         }
 416         return true;
 417 }
 418 
 419 /* Return true if there is room for another entry */
 420 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
 421 {
 422         size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
 423 
 424         if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
 425                 return false;
 426         return true;
 427 }
 428 
 429 /* Dump an auxiliary buffer. These buffers are multiple of
 430  * 4KB SDB pages.
 431  */
 432 static void s390_cpumsf_dump(struct s390_cpumsf *sf,
 433                              unsigned char *buf, size_t len)
 434 {
 435         const char *color = PERF_COLOR_BLUE;
 436         struct hws_basic_entry *basic;
 437         struct hws_diag_entry *diag;
 438         unsigned short bsdes, dsdes;
 439         size_t pos = 0;
 440 
 441         color_fprintf(stdout, color,
 442                       ". ... s390 AUX data: size %zu bytes\n",
 443                       len);
 444 
 445         if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
 446                                   &dsdes)) {
 447                 pr_err("Invalid AUX trace data block size:%zu"
 448                        " (type:%d bsdes:%hd dsdes:%hd)\n",
 449                        len, sf->machine_type, bsdes, dsdes);
 450                 return;
 451         }
 452 
 453         /* s390 kernel always returns 4KB blocks fully occupied,
 454          * no partially filled SDBs.
 455          */
 456         while (pos < len) {
 457                 /* Handle Basic entry */
 458                 basic = (struct hws_basic_entry *)(buf + pos);
 459                 if (s390_cpumsf_basic_show(color, pos, basic))
 460                         pos += bsdes;
 461                 else
 462                         return;
 463 
 464                 /* Handle Diagnostic entry */
 465                 diag = (struct hws_diag_entry *)(buf + pos);
 466                 if (s390_cpumsf_diag_show(color, pos, diag))
 467                         pos += dsdes;
 468                 else
 469                         return;
 470 
 471                 /* Check for trailer entry */
 472                 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
 473                         /* Show trailer entry */
 474                         struct hws_trailer_entry te;
 475 
 476                         pos = (pos + S390_CPUMSF_PAGESZ)
 477                                & ~(S390_CPUMSF_PAGESZ - 1);
 478                         pos -= sizeof(te);
 479                         memcpy(&te, buf + pos, sizeof(te));
 480                         /* Set descriptor sizes in case of old hardware
 481                          * where these values are not set.
 482                          */
 483                         te.bsdes = bsdes;
 484                         te.dsdes = dsdes;
 485                         if (s390_cpumsf_trailer_show(color, pos, &te))
 486                                 pos += sizeof(te);
 487                         else
 488                                 return;
 489                 }
 490         }
 491 }
 492 
 493 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
 494                                    size_t len)
 495 {
 496         printf(".\n");
 497         s390_cpumsf_dump(sf, buf, len);
 498 }
 499 
 500 #define S390_LPP_PID_MASK       0xffffffff
 501 
 502 static bool s390_cpumsf_make_event(size_t pos,
 503                                    struct hws_basic_entry *basic,
 504                                    struct s390_cpumsf_queue *sfq)
 505 {
 506         struct perf_sample sample = {
 507                                 .ip = basic->ia,
 508                                 .pid = basic->hpp & S390_LPP_PID_MASK,
 509                                 .tid = basic->hpp & S390_LPP_PID_MASK,
 510                                 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
 511                                 .cpu = sfq->cpu,
 512                                 .period = 1
 513                             };
 514         union perf_event event;
 515 
 516         memset(&event, 0, sizeof(event));
 517         if (basic->CL == 1)     /* Native LPAR mode */
 518                 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
 519                                           : PERF_RECORD_MISC_KERNEL;
 520         else if (basic->CL == 2)        /* Guest kernel/user space */
 521                 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
 522                                           : PERF_RECORD_MISC_GUEST_KERNEL;
 523         else if (basic->gpp || basic->prim_asn != 0xffff)
 524                 /* Use heuristics on old hardware */
 525                 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
 526                                           : PERF_RECORD_MISC_GUEST_KERNEL;
 527         else
 528                 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
 529                                           : PERF_RECORD_MISC_KERNEL;
 530 
 531         event.sample.header.type = PERF_RECORD_SAMPLE;
 532         event.sample.header.misc = sample.cpumode;
 533         event.sample.header.size = sizeof(struct perf_event_header);
 534 
 535         pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
 536                  __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
 537                  sample.tid, sample.cpumode, sample.cpu);
 538         if (perf_session__deliver_synth_event(sfq->sf->session, &event,
 539                                               &sample)) {
 540                 pr_err("s390 Auxiliary Trace: failed to deliver event\n");
 541                 return false;
 542         }
 543         return true;
 544 }
 545 
 546 static unsigned long long get_trailer_time(const unsigned char *buf)
 547 {
 548         struct hws_trailer_entry *te;
 549         unsigned long long aux_time, progusage2;
 550         bool clock_base;
 551 
 552         te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
 553                                               - sizeof(*te));
 554 
 555 #if __BYTE_ORDER == __LITTLE_ENDIAN
 556         clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
 557         progusage2 = be64toh(te->progusage[1]);
 558 #else
 559         clock_base = te->clock_base;
 560         progusage2 = te->progusage2;
 561 #endif
 562         if (!clock_base)        /* TOD_CLOCK_BASE value missing */
 563                 return 0;
 564 
 565         /* Correct calculation to convert time stamp in trailer entry to
 566          * nano seconds (taken from arch/s390 function tod_to_ns()).
 567          * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
 568          */
 569         aux_time = trailer_timestamp(te, clock_base) - progusage2;
 570         aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
 571         return aux_time;
 572 }
 573 
 574 /* Process the data samples of a single queue. The first parameter is a
 575  * pointer to the queue, the second parameter is the time stamp. This
 576  * is the time stamp:
 577  * - of the event that triggered this processing.
 578  * - or the time stamp when the last proccesing of this queue stopped.
 579  *   In this case it stopped at a 4KB page boundary and record the
 580  *   position on where to continue processing on the next invocation
 581  *   (see buffer->use_data and buffer->use_size).
 582  *
 583  * When this function returns the second parameter is updated to
 584  * reflect the time stamp of the last processed auxiliary data entry
 585  * (taken from the trailer entry of that page). The caller uses this
 586  * returned time stamp to record the last processed entry in this
 587  * queue.
 588  *
 589  * The function returns:
 590  * 0:  Processing successful. The second parameter returns the
 591  *     time stamp from the trailer entry until which position
 592  *     processing took place. Subsequent calls resume from this
 593  *     position.
 594  * <0: An error occurred during processing. The second parameter
 595  *     returns the maximum time stamp.
 596  * >0: Done on this queue. The second parameter returns the
 597  *     maximum time stamp.
 598  */
 599 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
 600 {
 601         struct s390_cpumsf *sf = sfq->sf;
 602         unsigned char *buf = sfq->buffer->use_data;
 603         size_t len = sfq->buffer->use_size;
 604         struct hws_basic_entry *basic;
 605         unsigned short bsdes, dsdes;
 606         size_t pos = 0;
 607         int err = 1;
 608         u64 aux_ts;
 609 
 610         if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
 611                                   &dsdes)) {
 612                 *ts = ~0ULL;
 613                 return -1;
 614         }
 615 
 616         /* Get trailer entry time stamp and check if entries in
 617          * this auxiliary page are ready for processing. If the
 618          * time stamp of the first entry is too high, whole buffer
 619          * can be skipped. In this case return time stamp.
 620          */
 621         aux_ts = get_trailer_time(buf);
 622         if (!aux_ts) {
 623                 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
 624                        (s64)sfq->buffer->data_offset);
 625                 aux_ts = ~0ULL;
 626                 goto out;
 627         }
 628         if (aux_ts > *ts) {
 629                 *ts = aux_ts;
 630                 return 0;
 631         }
 632 
 633         while (pos < len) {
 634                 /* Handle Basic entry */
 635                 basic = (struct hws_basic_entry *)(buf + pos);
 636                 if (s390_cpumsf_make_event(pos, basic, sfq))
 637                         pos += bsdes;
 638                 else {
 639                         err = -EBADF;
 640                         goto out;
 641                 }
 642 
 643                 pos += dsdes;   /* Skip diagnositic entry */
 644 
 645                 /* Check for trailer entry */
 646                 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
 647                         pos = (pos + S390_CPUMSF_PAGESZ)
 648                                & ~(S390_CPUMSF_PAGESZ - 1);
 649                         /* Check existence of next page */
 650                         if (pos >= len)
 651                                 break;
 652                         aux_ts = get_trailer_time(buf + pos);
 653                         if (!aux_ts) {
 654                                 aux_ts = ~0ULL;
 655                                 goto out;
 656                         }
 657                         if (aux_ts > *ts) {
 658                                 *ts = aux_ts;
 659                                 sfq->buffer->use_data += pos;
 660                                 sfq->buffer->use_size -= pos;
 661                                 return 0;
 662                         }
 663                 }
 664         }
 665 out:
 666         *ts = aux_ts;
 667         sfq->buffer->use_size = 0;
 668         sfq->buffer->use_data = NULL;
 669         return err;     /* Buffer completely scanned or error */
 670 }
 671 
 672 /* Run the s390 auxiliary trace decoder.
 673  * Select the queue buffer to operate on, the caller already selected
 674  * the proper queue, depending on second parameter 'ts'.
 675  * This is the time stamp until which the auxiliary entries should
 676  * be processed. This value is updated by called functions and
 677  * returned to the caller.
 678  *
 679  * Resume processing in the current buffer. If there is no buffer
 680  * get a new buffer from the queue and setup start position for
 681  * processing.
 682  * When a buffer is completely processed remove it from the queue
 683  * before returning.
 684  *
 685  * This function returns
 686  * 1: When the queue is empty. Second parameter will be set to
 687  *    maximum time stamp.
 688  * 0: Normal processing done.
 689  * <0: Error during queue buffer setup. This causes the caller
 690  *     to stop processing completely.
 691  */
 692 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
 693                                    u64 *ts)
 694 {
 695 
 696         struct auxtrace_buffer *buffer;
 697         struct auxtrace_queue *queue;
 698         int err;
 699 
 700         queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
 701 
 702         /* Get buffer and last position in buffer to resume
 703          * decoding the auxiliary entries. One buffer might be large
 704          * and decoding might stop in between. This depends on the time
 705          * stamp of the trailer entry in each page of the auxiliary
 706          * data and the time stamp of the event triggering the decoding.
 707          */
 708         if (sfq->buffer == NULL) {
 709                 sfq->buffer = buffer = auxtrace_buffer__next(queue,
 710                                                              sfq->buffer);
 711                 if (!buffer) {
 712                         *ts = ~0ULL;
 713                         return 1;       /* Processing done on this queue */
 714                 }
 715                 /* Start with a new buffer on this queue */
 716                 if (buffer->data) {
 717                         buffer->use_size = buffer->size;
 718                         buffer->use_data = buffer->data;
 719                 }
 720                 if (sfq->logfile) {     /* Write into log file */
 721                         size_t rc = fwrite(buffer->data, buffer->size, 1,
 722                                            sfq->logfile);
 723                         if (rc != 1)
 724                                 pr_err("Failed to write auxiliary data\n");
 725                 }
 726         } else
 727                 buffer = sfq->buffer;
 728 
 729         if (!buffer->data) {
 730                 int fd = perf_data__fd(sfq->sf->session->data);
 731 
 732                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
 733                 if (!buffer->data)
 734                         return -ENOMEM;
 735                 buffer->use_size = buffer->size;
 736                 buffer->use_data = buffer->data;
 737 
 738                 if (sfq->logfile) {     /* Write into log file */
 739                         size_t rc = fwrite(buffer->data, buffer->size, 1,
 740                                            sfq->logfile);
 741                         if (rc != 1)
 742                                 pr_err("Failed to write auxiliary data\n");
 743                 }
 744         }
 745         pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
 746                   __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
 747                   buffer->size, buffer->use_size);
 748         err = s390_cpumsf_samples(sfq, ts);
 749 
 750         /* If non-zero, there is either an error (err < 0) or the buffer is
 751          * completely done (err > 0). The error is unrecoverable, usually
 752          * some descriptors could not be read successfully, so continue with
 753          * the next buffer.
 754          * In both cases the parameter 'ts' has been updated.
 755          */
 756         if (err) {
 757                 sfq->buffer = NULL;
 758                 list_del_init(&buffer->list);
 759                 auxtrace_buffer__free(buffer);
 760                 if (err > 0)            /* Buffer done, no error */
 761                         err = 0;
 762         }
 763         return err;
 764 }
 765 
 766 static struct s390_cpumsf_queue *
 767 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
 768 {
 769         struct s390_cpumsf_queue *sfq;
 770 
 771         sfq = zalloc(sizeof(struct s390_cpumsf_queue));
 772         if (sfq == NULL)
 773                 return NULL;
 774 
 775         sfq->sf = sf;
 776         sfq->queue_nr = queue_nr;
 777         sfq->cpu = -1;
 778         if (sf->use_logfile) {
 779                 char *name;
 780                 int rc;
 781 
 782                 rc = (sf->logdir)
 783                         ? asprintf(&name, "%s/aux.smp.%02x",
 784                                  sf->logdir, queue_nr)
 785                         : asprintf(&name, "aux.smp.%02x", queue_nr);
 786                 if (rc > 0)
 787                         sfq->logfile = fopen(name, "w");
 788                 if (sfq->logfile == NULL) {
 789                         pr_err("Failed to open auxiliary log file %s,"
 790                                "continue...\n", name);
 791                         sf->use_logfile = false;
 792                 }
 793                 free(name);
 794         }
 795         return sfq;
 796 }
 797 
 798 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
 799                                    struct auxtrace_queue *queue,
 800                                    unsigned int queue_nr, u64 ts)
 801 {
 802         struct s390_cpumsf_queue *sfq = queue->priv;
 803 
 804         if (list_empty(&queue->head))
 805                 return 0;
 806 
 807         if (sfq == NULL) {
 808                 sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
 809                 if (!sfq)
 810                         return -ENOMEM;
 811                 queue->priv = sfq;
 812 
 813                 if (queue->cpu != -1)
 814                         sfq->cpu = queue->cpu;
 815         }
 816         return auxtrace_heap__add(&sf->heap, queue_nr, ts);
 817 }
 818 
 819 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
 820 {
 821         unsigned int i;
 822         int ret = 0;
 823 
 824         for (i = 0; i < sf->queues.nr_queues; i++) {
 825                 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
 826                                               i, ts);
 827                 if (ret)
 828                         break;
 829         }
 830         return ret;
 831 }
 832 
 833 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
 834 {
 835         if (!sf->queues.new_data)
 836                 return 0;
 837 
 838         sf->queues.new_data = false;
 839         return s390_cpumsf_setup_queues(sf, ts);
 840 }
 841 
 842 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
 843 {
 844         unsigned int queue_nr;
 845         u64 ts;
 846         int ret;
 847 
 848         while (1) {
 849                 struct auxtrace_queue *queue;
 850                 struct s390_cpumsf_queue *sfq;
 851 
 852                 if (!sf->heap.heap_cnt)
 853                         return 0;
 854 
 855                 if (sf->heap.heap_array[0].ordinal >= timestamp)
 856                         return 0;
 857 
 858                 queue_nr = sf->heap.heap_array[0].queue_nr;
 859                 queue = &sf->queues.queue_array[queue_nr];
 860                 sfq = queue->priv;
 861 
 862                 auxtrace_heap__pop(&sf->heap);
 863                 if (sf->heap.heap_cnt) {
 864                         ts = sf->heap.heap_array[0].ordinal + 1;
 865                         if (ts > timestamp)
 866                                 ts = timestamp;
 867                 } else {
 868                         ts = timestamp;
 869                 }
 870 
 871                 ret = s390_cpumsf_run_decoder(sfq, &ts);
 872                 if (ret < 0) {
 873                         auxtrace_heap__add(&sf->heap, queue_nr, ts);
 874                         return ret;
 875                 }
 876                 if (!ret) {
 877                         ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
 878                         if (ret < 0)
 879                                 return ret;
 880                 }
 881         }
 882         return 0;
 883 }
 884 
 885 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
 886                                    pid_t pid, pid_t tid, u64 ip, u64 timestamp)
 887 {
 888         char msg[MAX_AUXTRACE_ERROR_MSG];
 889         union perf_event event;
 890         int err;
 891 
 892         strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
 893         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
 894                              code, cpu, pid, tid, ip, msg, timestamp);
 895 
 896         err = perf_session__deliver_synth_event(sf->session, &event, NULL);
 897         if (err)
 898                 pr_err("s390 Auxiliary Trace: failed to deliver error event,"
 899                         "error %d\n", err);
 900         return err;
 901 }
 902 
 903 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
 904 {
 905         return s390_cpumsf_synth_error(sf, 1, sample->cpu,
 906                                        sample->pid, sample->tid, 0,
 907                                        sample->time);
 908 }
 909 
 910 static int
 911 s390_cpumsf_process_event(struct perf_session *session,
 912                           union perf_event *event,
 913                           struct perf_sample *sample,
 914                           struct perf_tool *tool)
 915 {
 916         struct s390_cpumsf *sf = container_of(session->auxtrace,
 917                                               struct s390_cpumsf,
 918                                               auxtrace);
 919         u64 timestamp = sample->time;
 920         struct evsel *ev_bc000;
 921 
 922         int err = 0;
 923 
 924         if (dump_trace)
 925                 return 0;
 926 
 927         if (!tool->ordered_events) {
 928                 pr_err("s390 Auxiliary Trace requires ordered events\n");
 929                 return -EINVAL;
 930         }
 931 
 932         if (event->header.type == PERF_RECORD_SAMPLE &&
 933             sample->raw_size) {
 934                 /* Handle event with raw data */
 935                 ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
 936                 if (ev_bc000 &&
 937                     ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
 938                         err = s390_cpumcf_dumpctr(sf, sample);
 939                 return err;
 940         }
 941 
 942         if (event->header.type == PERF_RECORD_AUX &&
 943             event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
 944                 return s390_cpumsf_lost(sf, sample);
 945 
 946         if (timestamp) {
 947                 err = s390_cpumsf_update_queues(sf, timestamp);
 948                 if (!err)
 949                         err = s390_cpumsf_process_queues(sf, timestamp);
 950         }
 951         return err;
 952 }
 953 
 954 struct s390_cpumsf_synth {
 955         struct perf_tool cpumsf_tool;
 956         struct perf_session *session;
 957 };
 958 
 959 static int
 960 s390_cpumsf_process_auxtrace_event(struct perf_session *session,
 961                                    union perf_event *event __maybe_unused,
 962                                    struct perf_tool *tool __maybe_unused)
 963 {
 964         struct s390_cpumsf *sf = container_of(session->auxtrace,
 965                                               struct s390_cpumsf,
 966                                               auxtrace);
 967 
 968         int fd = perf_data__fd(session->data);
 969         struct auxtrace_buffer *buffer;
 970         off_t data_offset;
 971         int err;
 972 
 973         if (sf->data_queued)
 974                 return 0;
 975 
 976         if (perf_data__is_pipe(session->data)) {
 977                 data_offset = 0;
 978         } else {
 979                 data_offset = lseek(fd, 0, SEEK_CUR);
 980                 if (data_offset == -1)
 981                         return -errno;
 982         }
 983 
 984         err = auxtrace_queues__add_event(&sf->queues, session, event,
 985                                          data_offset, &buffer);
 986         if (err)
 987                 return err;
 988 
 989         /* Dump here after copying piped trace out of the pipe */
 990         if (dump_trace) {
 991                 if (auxtrace_buffer__get_data(buffer, fd)) {
 992                         s390_cpumsf_dump_event(sf, buffer->data,
 993                                                buffer->size);
 994                         auxtrace_buffer__put_data(buffer);
 995                 }
 996         }
 997         return 0;
 998 }
 999 
1000 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
1001 {
1002 }
1003 
1004 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
1005                              struct perf_tool *tool __maybe_unused)
1006 {
1007         return 0;
1008 }
1009 
1010 static void s390_cpumsf_free_queues(struct perf_session *session)
1011 {
1012         struct s390_cpumsf *sf = container_of(session->auxtrace,
1013                                               struct s390_cpumsf,
1014                                               auxtrace);
1015         struct auxtrace_queues *queues = &sf->queues;
1016         unsigned int i;
1017 
1018         for (i = 0; i < queues->nr_queues; i++) {
1019                 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
1020                                                 queues->queue_array[i].priv;
1021 
1022                 if (sfq != NULL) {
1023                         if (sfq->logfile) {
1024                                 fclose(sfq->logfile);
1025                                 sfq->logfile = NULL;
1026                         }
1027                         if (sfq->logfile_ctr) {
1028                                 fclose(sfq->logfile_ctr);
1029                                 sfq->logfile_ctr = NULL;
1030                         }
1031                 }
1032                 zfree(&queues->queue_array[i].priv);
1033         }
1034         auxtrace_queues__free(queues);
1035 }
1036 
1037 static void s390_cpumsf_free(struct perf_session *session)
1038 {
1039         struct s390_cpumsf *sf = container_of(session->auxtrace,
1040                                               struct s390_cpumsf,
1041                                               auxtrace);
1042 
1043         auxtrace_heap__free(&sf->heap);
1044         s390_cpumsf_free_queues(session);
1045         session->auxtrace = NULL;
1046         zfree(&sf->logdir);
1047         free(sf);
1048 }
1049 
1050 static int s390_cpumsf_get_type(const char *cpuid)
1051 {
1052         int ret, family = 0;
1053 
1054         ret = sscanf(cpuid, "%*[^,],%u", &family);
1055         return (ret == 1) ? family : 0;
1056 }
1057 
1058 /* Check itrace options set on perf report command.
1059  * Return true, if none are set or all options specified can be
1060  * handled on s390 (currently only option 'd' for logging.
1061  * Return false otherwise.
1062  */
1063 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
1064 {
1065         bool ison = false;
1066 
1067         if (!itops || !itops->set)
1068                 return true;
1069         ison = itops->inject || itops->instructions || itops->branches ||
1070                 itops->transactions || itops->ptwrites ||
1071                 itops->pwr_events || itops->errors ||
1072                 itops->dont_decode || itops->calls || itops->returns ||
1073                 itops->callchain || itops->thread_stack ||
1074                 itops->last_branch;
1075         if (!ison)
1076                 return true;
1077         pr_err("Unsupported --itrace options specified\n");
1078         return false;
1079 }
1080 
1081 /* Check for AUXTRACE dump directory if it is needed.
1082  * On failure print an error message but continue.
1083  * Return 0 on wrong keyword in config file and 1 otherwise.
1084  */
1085 static int s390_cpumsf__config(const char *var, const char *value, void *cb)
1086 {
1087         struct s390_cpumsf *sf = cb;
1088         struct stat stbuf;
1089         int rc;
1090 
1091         if (strcmp(var, "auxtrace.dumpdir"))
1092                 return 0;
1093         sf->logdir = strdup(value);
1094         if (sf->logdir == NULL) {
1095                 pr_err("Failed to find auxtrace log directory %s,"
1096                        " continue with current directory...\n", value);
1097                 return 1;
1098         }
1099         rc = stat(sf->logdir, &stbuf);
1100         if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
1101                 pr_err("Missing auxtrace log directory %s,"
1102                        " continue with current directory...\n", value);
1103                 zfree(&sf->logdir);
1104         }
1105         return 1;
1106 }
1107 
1108 int s390_cpumsf_process_auxtrace_info(union perf_event *event,
1109                                       struct perf_session *session)
1110 {
1111         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1112         struct s390_cpumsf *sf;
1113         int err;
1114 
1115         if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
1116                 return -EINVAL;
1117 
1118         sf = zalloc(sizeof(struct s390_cpumsf));
1119         if (sf == NULL)
1120                 return -ENOMEM;
1121 
1122         if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
1123                 err = -EINVAL;
1124                 goto err_free;
1125         }
1126         sf->use_logfile = session->itrace_synth_opts->log;
1127         if (sf->use_logfile)
1128                 perf_config(s390_cpumsf__config, sf);
1129 
1130         err = auxtrace_queues__init(&sf->queues);
1131         if (err)
1132                 goto err_free;
1133 
1134         sf->session = session;
1135         sf->machine = &session->machines.host; /* No kvm support */
1136         sf->auxtrace_type = auxtrace_info->type;
1137         sf->pmu_type = PERF_TYPE_RAW;
1138         sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
1139 
1140         sf->auxtrace.process_event = s390_cpumsf_process_event;
1141         sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
1142         sf->auxtrace.flush_events = s390_cpumsf_flush;
1143         sf->auxtrace.free_events = s390_cpumsf_free_events;
1144         sf->auxtrace.free = s390_cpumsf_free;
1145         session->auxtrace = &sf->auxtrace;
1146 
1147         if (dump_trace)
1148                 return 0;
1149 
1150         err = auxtrace_queues__process_index(&sf->queues, session);
1151         if (err)
1152                 goto err_free_queues;
1153 
1154         if (sf->queues.populated)
1155                 sf->data_queued = true;
1156 
1157         return 0;
1158 
1159 err_free_queues:
1160         auxtrace_queues__free(&sf->queues);
1161         session->auxtrace = NULL;
1162 err_free:
1163         zfree(&sf->logdir);
1164         free(sf);
1165         return err;
1166 }

/* [<][>][^][v][top][bottom][index][help] */