root/tools/perf/bench/mem-functions.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_cycles
  2. get_cycles
  3. timeval2double
  4. __bench_mem_function
  5. bench_mem_common
  6. do_memcpy_cycles
  7. do_memcpy_gettimeofday
  8. bench_mem_memcpy
  9. do_memset_cycles
  10. do_memset_gettimeofday
  11. bench_mem_memset

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * mem-memcpy.c
   4  *
   5  * Simple memcpy() and memset() benchmarks
   6  *
   7  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   8  */
   9 
  10 #include "debug.h"
  11 #include "../perf-sys.h"
  12 #include <subcmd/parse-options.h>
  13 #include "../util/header.h"
  14 #include "../util/cloexec.h"
  15 #include "../util/string2.h"
  16 #include "bench.h"
  17 #include "mem-memcpy-arch.h"
  18 #include "mem-memset-arch.h"
  19 
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <sys/time.h>
  25 #include <errno.h>
  26 #include <linux/time64.h>
  27 #include <linux/zalloc.h>
  28 
  29 #define K 1024
  30 
  31 static const char       *size_str       = "1MB";
  32 static const char       *function_str   = "all";
  33 static int              nr_loops        = 1;
  34 static bool             use_cycles;
  35 static int              cycles_fd;
  36 
  37 static const struct option options[] = {
  38         OPT_STRING('s', "size", &size_str, "1MB",
  39                     "Specify the size of the memory buffers. "
  40                     "Available units: B, KB, MB, GB and TB (case insensitive)"),
  41 
  42         OPT_STRING('f', "function", &function_str, "all",
  43                     "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  44 
  45         OPT_INTEGER('l', "nr_loops", &nr_loops,
  46                     "Specify the number of loops to run. (default: 1)"),
  47 
  48         OPT_BOOLEAN('c', "cycles", &use_cycles,
  49                     "Use a cycles event instead of gettimeofday() to measure performance"),
  50 
  51         OPT_END()
  52 };
  53 
  54 typedef void *(*memcpy_t)(void *, const void *, size_t);
  55 typedef void *(*memset_t)(void *, int, size_t);
  56 
  57 struct function {
  58         const char *name;
  59         const char *desc;
  60         union {
  61                 memcpy_t memcpy;
  62                 memset_t memset;
  63         } fn;
  64 };
  65 
  66 static struct perf_event_attr cycle_attr = {
  67         .type           = PERF_TYPE_HARDWARE,
  68         .config         = PERF_COUNT_HW_CPU_CYCLES
  69 };
  70 
  71 static int init_cycles(void)
  72 {
  73         cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  74 
  75         if (cycles_fd < 0 && errno == ENOSYS) {
  76                 pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  77                 return -1;
  78         }
  79 
  80         return cycles_fd;
  81 }
  82 
  83 static u64 get_cycles(void)
  84 {
  85         int ret;
  86         u64 clk;
  87 
  88         ret = read(cycles_fd, &clk, sizeof(u64));
  89         BUG_ON(ret != sizeof(u64));
  90 
  91         return clk;
  92 }
  93 
  94 static double timeval2double(struct timeval *ts)
  95 {
  96         return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
  97 }
  98 
  99 #define print_bps(x) do {                                               \
 100                 if (x < K)                                              \
 101                         printf(" %14lf bytes/sec\n", x);                \
 102                 else if (x < K * K)                                     \
 103                         printf(" %14lfd KB/sec\n", x / K);              \
 104                 else if (x < K * K * K)                                 \
 105                         printf(" %14lf MB/sec\n", x / K / K);           \
 106                 else                                                    \
 107                         printf(" %14lf GB/sec\n", x / K / K / K);       \
 108         } while (0)
 109 
 110 struct bench_mem_info {
 111         const struct function *functions;
 112         u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
 113         double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
 114         const char *const *usage;
 115         bool alloc_src;
 116 };
 117 
 118 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
 119 {
 120         const struct function *r = &info->functions[r_idx];
 121         double result_bps = 0.0;
 122         u64 result_cycles = 0;
 123         void *src = NULL, *dst = zalloc(size);
 124 
 125         printf("# function '%s' (%s)\n", r->name, r->desc);
 126 
 127         if (dst == NULL)
 128                 goto out_alloc_failed;
 129 
 130         if (info->alloc_src) {
 131                 src = zalloc(size);
 132                 if (src == NULL)
 133                         goto out_alloc_failed;
 134         }
 135 
 136         if (bench_format == BENCH_FORMAT_DEFAULT)
 137                 printf("# Copying %s bytes ...\n\n", size_str);
 138 
 139         if (use_cycles) {
 140                 result_cycles = info->do_cycles(r, size, src, dst);
 141         } else {
 142                 result_bps = info->do_gettimeofday(r, size, src, dst);
 143         }
 144 
 145         switch (bench_format) {
 146         case BENCH_FORMAT_DEFAULT:
 147                 if (use_cycles) {
 148                         printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
 149                 } else {
 150                         print_bps(result_bps);
 151                 }
 152                 break;
 153 
 154         case BENCH_FORMAT_SIMPLE:
 155                 if (use_cycles) {
 156                         printf("%lf\n", (double)result_cycles/size_total);
 157                 } else {
 158                         printf("%lf\n", result_bps);
 159                 }
 160                 break;
 161 
 162         default:
 163                 BUG_ON(1);
 164                 break;
 165         }
 166 
 167 out_free:
 168         free(src);
 169         free(dst);
 170         return;
 171 out_alloc_failed:
 172         printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
 173         goto out_free;
 174 }
 175 
 176 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
 177 {
 178         int i;
 179         size_t size;
 180         double size_total;
 181 
 182         argc = parse_options(argc, argv, options, info->usage, 0);
 183 
 184         if (use_cycles) {
 185                 i = init_cycles();
 186                 if (i < 0) {
 187                         fprintf(stderr, "Failed to open cycles counter\n");
 188                         return i;
 189                 }
 190         }
 191 
 192         size = (size_t)perf_atoll((char *)size_str);
 193         size_total = (double)size * nr_loops;
 194 
 195         if ((s64)size <= 0) {
 196                 fprintf(stderr, "Invalid size:%s\n", size_str);
 197                 return 1;
 198         }
 199 
 200         if (!strncmp(function_str, "all", 3)) {
 201                 for (i = 0; info->functions[i].name; i++)
 202                         __bench_mem_function(info, i, size, size_total);
 203                 return 0;
 204         }
 205 
 206         for (i = 0; info->functions[i].name; i++) {
 207                 if (!strcmp(info->functions[i].name, function_str))
 208                         break;
 209         }
 210         if (!info->functions[i].name) {
 211                 if (strcmp(function_str, "help") && strcmp(function_str, "h"))
 212                         printf("Unknown function: %s\n", function_str);
 213                 printf("Available functions:\n");
 214                 for (i = 0; info->functions[i].name; i++) {
 215                         printf("\t%s ... %s\n",
 216                                info->functions[i].name, info->functions[i].desc);
 217                 }
 218                 return 1;
 219         }
 220 
 221         __bench_mem_function(info, i, size, size_total);
 222 
 223         return 0;
 224 }
 225 
 226 static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
 227 {
 228         u64 cycle_start = 0ULL, cycle_end = 0ULL;
 229         memcpy_t fn = r->fn.memcpy;
 230         int i;
 231 
 232         /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 233         memset(src, 0, size);
 234 
 235         /*
 236          * We prefault the freshly allocated memory range here,
 237          * to not measure page fault overhead:
 238          */
 239         fn(dst, src, size);
 240 
 241         cycle_start = get_cycles();
 242         for (i = 0; i < nr_loops; ++i)
 243                 fn(dst, src, size);
 244         cycle_end = get_cycles();
 245 
 246         return cycle_end - cycle_start;
 247 }
 248 
 249 static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
 250 {
 251         struct timeval tv_start, tv_end, tv_diff;
 252         memcpy_t fn = r->fn.memcpy;
 253         int i;
 254 
 255         /*
 256          * We prefault the freshly allocated memory range here,
 257          * to not measure page fault overhead:
 258          */
 259         fn(dst, src, size);
 260 
 261         BUG_ON(gettimeofday(&tv_start, NULL));
 262         for (i = 0; i < nr_loops; ++i)
 263                 fn(dst, src, size);
 264         BUG_ON(gettimeofday(&tv_end, NULL));
 265 
 266         timersub(&tv_end, &tv_start, &tv_diff);
 267 
 268         return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 269 }
 270 
 271 struct function memcpy_functions[] = {
 272         { .name         = "default",
 273           .desc         = "Default memcpy() provided by glibc",
 274           .fn.memcpy    = memcpy },
 275 
 276 #ifdef HAVE_ARCH_X86_64_SUPPORT
 277 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 278 # include "mem-memcpy-x86-64-asm-def.h"
 279 # undef MEMCPY_FN
 280 #endif
 281 
 282         { .name = NULL, }
 283 };
 284 
 285 static const char * const bench_mem_memcpy_usage[] = {
 286         "perf bench mem memcpy <options>",
 287         NULL
 288 };
 289 
 290 int bench_mem_memcpy(int argc, const char **argv)
 291 {
 292         struct bench_mem_info info = {
 293                 .functions              = memcpy_functions,
 294                 .do_cycles              = do_memcpy_cycles,
 295                 .do_gettimeofday        = do_memcpy_gettimeofday,
 296                 .usage                  = bench_mem_memcpy_usage,
 297                 .alloc_src              = true,
 298         };
 299 
 300         return bench_mem_common(argc, argv, &info);
 301 }
 302 
 303 static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 304 {
 305         u64 cycle_start = 0ULL, cycle_end = 0ULL;
 306         memset_t fn = r->fn.memset;
 307         int i;
 308 
 309         /*
 310          * We prefault the freshly allocated memory range here,
 311          * to not measure page fault overhead:
 312          */
 313         fn(dst, -1, size);
 314 
 315         cycle_start = get_cycles();
 316         for (i = 0; i < nr_loops; ++i)
 317                 fn(dst, i, size);
 318         cycle_end = get_cycles();
 319 
 320         return cycle_end - cycle_start;
 321 }
 322 
 323 static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 324 {
 325         struct timeval tv_start, tv_end, tv_diff;
 326         memset_t fn = r->fn.memset;
 327         int i;
 328 
 329         /*
 330          * We prefault the freshly allocated memory range here,
 331          * to not measure page fault overhead:
 332          */
 333         fn(dst, -1, size);
 334 
 335         BUG_ON(gettimeofday(&tv_start, NULL));
 336         for (i = 0; i < nr_loops; ++i)
 337                 fn(dst, i, size);
 338         BUG_ON(gettimeofday(&tv_end, NULL));
 339 
 340         timersub(&tv_end, &tv_start, &tv_diff);
 341 
 342         return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 343 }
 344 
 345 static const char * const bench_mem_memset_usage[] = {
 346         "perf bench mem memset <options>",
 347         NULL
 348 };
 349 
 350 static const struct function memset_functions[] = {
 351         { .name         = "default",
 352           .desc         = "Default memset() provided by glibc",
 353           .fn.memset    = memset },
 354 
 355 #ifdef HAVE_ARCH_X86_64_SUPPORT
 356 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
 357 # include "mem-memset-x86-64-asm-def.h"
 358 # undef MEMSET_FN
 359 #endif
 360 
 361         { .name = NULL, }
 362 };
 363 
 364 int bench_mem_memset(int argc, const char **argv)
 365 {
 366         struct bench_mem_info info = {
 367                 .functions              = memset_functions,
 368                 .do_cycles              = do_memset_cycles,
 369                 .do_gettimeofday        = do_memset_gettimeofday,
 370                 .usage                  = bench_mem_memset_usage,
 371         };
 372 
 373         return bench_mem_common(argc, argv, &info);
 374 }

/* [<][>][^][v][top][bottom][index][help] */