1#include <linux/linkage.h> 2 3# enter salsa20_encrypt_bytes 4ENTRY(salsa20_encrypt_bytes) 5 mov %rsp,%r11 6 and $31,%r11 7 add $256,%r11 8 sub %r11,%rsp 9 # x = arg1 10 mov %rdi,%r8 11 # m = arg2 12 mov %rsi,%rsi 13 # out = arg3 14 mov %rdx,%rdi 15 # bytes = arg4 16 mov %rcx,%rdx 17 # unsigned>? bytes - 0 18 cmp $0,%rdx 19 # comment:fp stack unchanged by jump 20 # goto done if !unsigned> 21 jbe ._done 22 # comment:fp stack unchanged by fallthrough 23# start: 24._start: 25 # r11_stack = r11 26 movq %r11,0(%rsp) 27 # r12_stack = r12 28 movq %r12,8(%rsp) 29 # r13_stack = r13 30 movq %r13,16(%rsp) 31 # r14_stack = r14 32 movq %r14,24(%rsp) 33 # r15_stack = r15 34 movq %r15,32(%rsp) 35 # rbx_stack = rbx 36 movq %rbx,40(%rsp) 37 # rbp_stack = rbp 38 movq %rbp,48(%rsp) 39 # in0 = *(uint64 *) (x + 0) 40 movq 0(%r8),%rcx 41 # in2 = *(uint64 *) (x + 8) 42 movq 8(%r8),%r9 43 # in4 = *(uint64 *) (x + 16) 44 movq 16(%r8),%rax 45 # in6 = *(uint64 *) (x + 24) 46 movq 24(%r8),%r10 47 # in8 = *(uint64 *) (x + 32) 48 movq 32(%r8),%r11 49 # in10 = *(uint64 *) (x + 40) 50 movq 40(%r8),%r12 51 # in12 = *(uint64 *) (x + 48) 52 movq 48(%r8),%r13 53 # in14 = *(uint64 *) (x + 56) 54 movq 56(%r8),%r14 55 # j0 = in0 56 movq %rcx,56(%rsp) 57 # j2 = in2 58 movq %r9,64(%rsp) 59 # j4 = in4 60 movq %rax,72(%rsp) 61 # j6 = in6 62 movq %r10,80(%rsp) 63 # j8 = in8 64 movq %r11,88(%rsp) 65 # j10 = in10 66 movq %r12,96(%rsp) 67 # j12 = in12 68 movq %r13,104(%rsp) 69 # j14 = in14 70 movq %r14,112(%rsp) 71 # x_backup = x 72 movq %r8,120(%rsp) 73# bytesatleast1: 74._bytesatleast1: 75 # unsigned<? bytes - 64 76 cmp $64,%rdx 77 # comment:fp stack unchanged by jump 78 # goto nocopy if !unsigned< 79 jae ._nocopy 80 # ctarget = out 81 movq %rdi,128(%rsp) 82 # out = &tmp 83 leaq 192(%rsp),%rdi 84 # i = bytes 85 mov %rdx,%rcx 86 # while (i) { *out++ = *m++; --i } 87 rep movsb 88 # out = &tmp 89 leaq 192(%rsp),%rdi 90 # m = &tmp 91 leaq 192(%rsp),%rsi 92 # comment:fp stack unchanged by fallthrough 93# nocopy: 94._nocopy: 95 # out_backup = out 96 movq %rdi,136(%rsp) 97 # m_backup = m 98 movq %rsi,144(%rsp) 99 # bytes_backup = bytes 100 movq %rdx,152(%rsp) 101 # x1 = j0 102 movq 56(%rsp),%rdi 103 # x0 = x1 104 mov %rdi,%rdx 105 # (uint64) x1 >>= 32 106 shr $32,%rdi 107 # x3 = j2 108 movq 64(%rsp),%rsi 109 # x2 = x3 110 mov %rsi,%rcx 111 # (uint64) x3 >>= 32 112 shr $32,%rsi 113 # x5 = j4 114 movq 72(%rsp),%r8 115 # x4 = x5 116 mov %r8,%r9 117 # (uint64) x5 >>= 32 118 shr $32,%r8 119 # x5_stack = x5 120 movq %r8,160(%rsp) 121 # x7 = j6 122 movq 80(%rsp),%r8 123 # x6 = x7 124 mov %r8,%rax 125 # (uint64) x7 >>= 32 126 shr $32,%r8 127 # x9 = j8 128 movq 88(%rsp),%r10 129 # x8 = x9 130 mov %r10,%r11 131 # (uint64) x9 >>= 32 132 shr $32,%r10 133 # x11 = j10 134 movq 96(%rsp),%r12 135 # x10 = x11 136 mov %r12,%r13 137 # x10_stack = x10 138 movq %r13,168(%rsp) 139 # (uint64) x11 >>= 32 140 shr $32,%r12 141 # x13 = j12 142 movq 104(%rsp),%r13 143 # x12 = x13 144 mov %r13,%r14 145 # (uint64) x13 >>= 32 146 shr $32,%r13 147 # x15 = j14 148 movq 112(%rsp),%r15 149 # x14 = x15 150 mov %r15,%rbx 151 # (uint64) x15 >>= 32 152 shr $32,%r15 153 # x15_stack = x15 154 movq %r15,176(%rsp) 155 # i = 20 156 mov $20,%r15 157# mainloop: 158._mainloop: 159 # i_backup = i 160 movq %r15,184(%rsp) 161 # x5 = x5_stack 162 movq 160(%rsp),%r15 163 # a = x12 + x0 164 lea (%r14,%rdx),%rbp 165 # (uint32) a <<<= 7 166 rol $7,%ebp 167 # x4 ^= a 168 xor %rbp,%r9 169 # b = x1 + x5 170 lea (%rdi,%r15),%rbp 171 # (uint32) b <<<= 7 172 rol $7,%ebp 173 # x9 ^= b 174 xor %rbp,%r10 175 # a = x0 + x4 176 lea (%rdx,%r9),%rbp 177 # (uint32) a <<<= 9 178 rol $9,%ebp 179 # x8 ^= a 180 xor %rbp,%r11 181 # b = x5 + x9 182 lea (%r15,%r10),%rbp 183 # (uint32) b <<<= 9 184 rol $9,%ebp 185 # x13 ^= b 186 xor %rbp,%r13 187 # a = x4 + x8 188 lea (%r9,%r11),%rbp 189 # (uint32) a <<<= 13 190 rol $13,%ebp 191 # x12 ^= a 192 xor %rbp,%r14 193 # b = x9 + x13 194 lea (%r10,%r13),%rbp 195 # (uint32) b <<<= 13 196 rol $13,%ebp 197 # x1 ^= b 198 xor %rbp,%rdi 199 # a = x8 + x12 200 lea (%r11,%r14),%rbp 201 # (uint32) a <<<= 18 202 rol $18,%ebp 203 # x0 ^= a 204 xor %rbp,%rdx 205 # b = x13 + x1 206 lea (%r13,%rdi),%rbp 207 # (uint32) b <<<= 18 208 rol $18,%ebp 209 # x5 ^= b 210 xor %rbp,%r15 211 # x10 = x10_stack 212 movq 168(%rsp),%rbp 213 # x5_stack = x5 214 movq %r15,160(%rsp) 215 # c = x6 + x10 216 lea (%rax,%rbp),%r15 217 # (uint32) c <<<= 7 218 rol $7,%r15d 219 # x14 ^= c 220 xor %r15,%rbx 221 # c = x10 + x14 222 lea (%rbp,%rbx),%r15 223 # (uint32) c <<<= 9 224 rol $9,%r15d 225 # x2 ^= c 226 xor %r15,%rcx 227 # c = x14 + x2 228 lea (%rbx,%rcx),%r15 229 # (uint32) c <<<= 13 230 rol $13,%r15d 231 # x6 ^= c 232 xor %r15,%rax 233 # c = x2 + x6 234 lea (%rcx,%rax),%r15 235 # (uint32) c <<<= 18 236 rol $18,%r15d 237 # x10 ^= c 238 xor %r15,%rbp 239 # x15 = x15_stack 240 movq 176(%rsp),%r15 241 # x10_stack = x10 242 movq %rbp,168(%rsp) 243 # d = x11 + x15 244 lea (%r12,%r15),%rbp 245 # (uint32) d <<<= 7 246 rol $7,%ebp 247 # x3 ^= d 248 xor %rbp,%rsi 249 # d = x15 + x3 250 lea (%r15,%rsi),%rbp 251 # (uint32) d <<<= 9 252 rol $9,%ebp 253 # x7 ^= d 254 xor %rbp,%r8 255 # d = x3 + x7 256 lea (%rsi,%r8),%rbp 257 # (uint32) d <<<= 13 258 rol $13,%ebp 259 # x11 ^= d 260 xor %rbp,%r12 261 # d = x7 + x11 262 lea (%r8,%r12),%rbp 263 # (uint32) d <<<= 18 264 rol $18,%ebp 265 # x15 ^= d 266 xor %rbp,%r15 267 # x15_stack = x15 268 movq %r15,176(%rsp) 269 # x5 = x5_stack 270 movq 160(%rsp),%r15 271 # a = x3 + x0 272 lea (%rsi,%rdx),%rbp 273 # (uint32) a <<<= 7 274 rol $7,%ebp 275 # x1 ^= a 276 xor %rbp,%rdi 277 # b = x4 + x5 278 lea (%r9,%r15),%rbp 279 # (uint32) b <<<= 7 280 rol $7,%ebp 281 # x6 ^= b 282 xor %rbp,%rax 283 # a = x0 + x1 284 lea (%rdx,%rdi),%rbp 285 # (uint32) a <<<= 9 286 rol $9,%ebp 287 # x2 ^= a 288 xor %rbp,%rcx 289 # b = x5 + x6 290 lea (%r15,%rax),%rbp 291 # (uint32) b <<<= 9 292 rol $9,%ebp 293 # x7 ^= b 294 xor %rbp,%r8 295 # a = x1 + x2 296 lea (%rdi,%rcx),%rbp 297 # (uint32) a <<<= 13 298 rol $13,%ebp 299 # x3 ^= a 300 xor %rbp,%rsi 301 # b = x6 + x7 302 lea (%rax,%r8),%rbp 303 # (uint32) b <<<= 13 304 rol $13,%ebp 305 # x4 ^= b 306 xor %rbp,%r9 307 # a = x2 + x3 308 lea (%rcx,%rsi),%rbp 309 # (uint32) a <<<= 18 310 rol $18,%ebp 311 # x0 ^= a 312 xor %rbp,%rdx 313 # b = x7 + x4 314 lea (%r8,%r9),%rbp 315 # (uint32) b <<<= 18 316 rol $18,%ebp 317 # x5 ^= b 318 xor %rbp,%r15 319 # x10 = x10_stack 320 movq 168(%rsp),%rbp 321 # x5_stack = x5 322 movq %r15,160(%rsp) 323 # c = x9 + x10 324 lea (%r10,%rbp),%r15 325 # (uint32) c <<<= 7 326 rol $7,%r15d 327 # x11 ^= c 328 xor %r15,%r12 329 # c = x10 + x11 330 lea (%rbp,%r12),%r15 331 # (uint32) c <<<= 9 332 rol $9,%r15d 333 # x8 ^= c 334 xor %r15,%r11 335 # c = x11 + x8 336 lea (%r12,%r11),%r15 337 # (uint32) c <<<= 13 338 rol $13,%r15d 339 # x9 ^= c 340 xor %r15,%r10 341 # c = x8 + x9 342 lea (%r11,%r10),%r15 343 # (uint32) c <<<= 18 344 rol $18,%r15d 345 # x10 ^= c 346 xor %r15,%rbp 347 # x15 = x15_stack 348 movq 176(%rsp),%r15 349 # x10_stack = x10 350 movq %rbp,168(%rsp) 351 # d = x14 + x15 352 lea (%rbx,%r15),%rbp 353 # (uint32) d <<<= 7 354 rol $7,%ebp 355 # x12 ^= d 356 xor %rbp,%r14 357 # d = x15 + x12 358 lea (%r15,%r14),%rbp 359 # (uint32) d <<<= 9 360 rol $9,%ebp 361 # x13 ^= d 362 xor %rbp,%r13 363 # d = x12 + x13 364 lea (%r14,%r13),%rbp 365 # (uint32) d <<<= 13 366 rol $13,%ebp 367 # x14 ^= d 368 xor %rbp,%rbx 369 # d = x13 + x14 370 lea (%r13,%rbx),%rbp 371 # (uint32) d <<<= 18 372 rol $18,%ebp 373 # x15 ^= d 374 xor %rbp,%r15 375 # x15_stack = x15 376 movq %r15,176(%rsp) 377 # x5 = x5_stack 378 movq 160(%rsp),%r15 379 # a = x12 + x0 380 lea (%r14,%rdx),%rbp 381 # (uint32) a <<<= 7 382 rol $7,%ebp 383 # x4 ^= a 384 xor %rbp,%r9 385 # b = x1 + x5 386 lea (%rdi,%r15),%rbp 387 # (uint32) b <<<= 7 388 rol $7,%ebp 389 # x9 ^= b 390 xor %rbp,%r10 391 # a = x0 + x4 392 lea (%rdx,%r9),%rbp 393 # (uint32) a <<<= 9 394 rol $9,%ebp 395 # x8 ^= a 396 xor %rbp,%r11 397 # b = x5 + x9 398 lea (%r15,%r10),%rbp 399 # (uint32) b <<<= 9 400 rol $9,%ebp 401 # x13 ^= b 402 xor %rbp,%r13 403 # a = x4 + x8 404 lea (%r9,%r11),%rbp 405 # (uint32) a <<<= 13 406 rol $13,%ebp 407 # x12 ^= a 408 xor %rbp,%r14 409 # b = x9 + x13 410 lea (%r10,%r13),%rbp 411 # (uint32) b <<<= 13 412 rol $13,%ebp 413 # x1 ^= b 414 xor %rbp,%rdi 415 # a = x8 + x12 416 lea (%r11,%r14),%rbp 417 # (uint32) a <<<= 18 418 rol $18,%ebp 419 # x0 ^= a 420 xor %rbp,%rdx 421 # b = x13 + x1 422 lea (%r13,%rdi),%rbp 423 # (uint32) b <<<= 18 424 rol $18,%ebp 425 # x5 ^= b 426 xor %rbp,%r15 427 # x10 = x10_stack 428 movq 168(%rsp),%rbp 429 # x5_stack = x5 430 movq %r15,160(%rsp) 431 # c = x6 + x10 432 lea (%rax,%rbp),%r15 433 # (uint32) c <<<= 7 434 rol $7,%r15d 435 # x14 ^= c 436 xor %r15,%rbx 437 # c = x10 + x14 438 lea (%rbp,%rbx),%r15 439 # (uint32) c <<<= 9 440 rol $9,%r15d 441 # x2 ^= c 442 xor %r15,%rcx 443 # c = x14 + x2 444 lea (%rbx,%rcx),%r15 445 # (uint32) c <<<= 13 446 rol $13,%r15d 447 # x6 ^= c 448 xor %r15,%rax 449 # c = x2 + x6 450 lea (%rcx,%rax),%r15 451 # (uint32) c <<<= 18 452 rol $18,%r15d 453 # x10 ^= c 454 xor %r15,%rbp 455 # x15 = x15_stack 456 movq 176(%rsp),%r15 457 # x10_stack = x10 458 movq %rbp,168(%rsp) 459 # d = x11 + x15 460 lea (%r12,%r15),%rbp 461 # (uint32) d <<<= 7 462 rol $7,%ebp 463 # x3 ^= d 464 xor %rbp,%rsi 465 # d = x15 + x3 466 lea (%r15,%rsi),%rbp 467 # (uint32) d <<<= 9 468 rol $9,%ebp 469 # x7 ^= d 470 xor %rbp,%r8 471 # d = x3 + x7 472 lea (%rsi,%r8),%rbp 473 # (uint32) d <<<= 13 474 rol $13,%ebp 475 # x11 ^= d 476 xor %rbp,%r12 477 # d = x7 + x11 478 lea (%r8,%r12),%rbp 479 # (uint32) d <<<= 18 480 rol $18,%ebp 481 # x15 ^= d 482 xor %rbp,%r15 483 # x15_stack = x15 484 movq %r15,176(%rsp) 485 # x5 = x5_stack 486 movq 160(%rsp),%r15 487 # a = x3 + x0 488 lea (%rsi,%rdx),%rbp 489 # (uint32) a <<<= 7 490 rol $7,%ebp 491 # x1 ^= a 492 xor %rbp,%rdi 493 # b = x4 + x5 494 lea (%r9,%r15),%rbp 495 # (uint32) b <<<= 7 496 rol $7,%ebp 497 # x6 ^= b 498 xor %rbp,%rax 499 # a = x0 + x1 500 lea (%rdx,%rdi),%rbp 501 # (uint32) a <<<= 9 502 rol $9,%ebp 503 # x2 ^= a 504 xor %rbp,%rcx 505 # b = x5 + x6 506 lea (%r15,%rax),%rbp 507 # (uint32) b <<<= 9 508 rol $9,%ebp 509 # x7 ^= b 510 xor %rbp,%r8 511 # a = x1 + x2 512 lea (%rdi,%rcx),%rbp 513 # (uint32) a <<<= 13 514 rol $13,%ebp 515 # x3 ^= a 516 xor %rbp,%rsi 517 # b = x6 + x7 518 lea (%rax,%r8),%rbp 519 # (uint32) b <<<= 13 520 rol $13,%ebp 521 # x4 ^= b 522 xor %rbp,%r9 523 # a = x2 + x3 524 lea (%rcx,%rsi),%rbp 525 # (uint32) a <<<= 18 526 rol $18,%ebp 527 # x0 ^= a 528 xor %rbp,%rdx 529 # b = x7 + x4 530 lea (%r8,%r9),%rbp 531 # (uint32) b <<<= 18 532 rol $18,%ebp 533 # x5 ^= b 534 xor %rbp,%r15 535 # x10 = x10_stack 536 movq 168(%rsp),%rbp 537 # x5_stack = x5 538 movq %r15,160(%rsp) 539 # c = x9 + x10 540 lea (%r10,%rbp),%r15 541 # (uint32) c <<<= 7 542 rol $7,%r15d 543 # x11 ^= c 544 xor %r15,%r12 545 # c = x10 + x11 546 lea (%rbp,%r12),%r15 547 # (uint32) c <<<= 9 548 rol $9,%r15d 549 # x8 ^= c 550 xor %r15,%r11 551 # c = x11 + x8 552 lea (%r12,%r11),%r15 553 # (uint32) c <<<= 13 554 rol $13,%r15d 555 # x9 ^= c 556 xor %r15,%r10 557 # c = x8 + x9 558 lea (%r11,%r10),%r15 559 # (uint32) c <<<= 18 560 rol $18,%r15d 561 # x10 ^= c 562 xor %r15,%rbp 563 # x15 = x15_stack 564 movq 176(%rsp),%r15 565 # x10_stack = x10 566 movq %rbp,168(%rsp) 567 # d = x14 + x15 568 lea (%rbx,%r15),%rbp 569 # (uint32) d <<<= 7 570 rol $7,%ebp 571 # x12 ^= d 572 xor %rbp,%r14 573 # d = x15 + x12 574 lea (%r15,%r14),%rbp 575 # (uint32) d <<<= 9 576 rol $9,%ebp 577 # x13 ^= d 578 xor %rbp,%r13 579 # d = x12 + x13 580 lea (%r14,%r13),%rbp 581 # (uint32) d <<<= 13 582 rol $13,%ebp 583 # x14 ^= d 584 xor %rbp,%rbx 585 # d = x13 + x14 586 lea (%r13,%rbx),%rbp 587 # (uint32) d <<<= 18 588 rol $18,%ebp 589 # x15 ^= d 590 xor %rbp,%r15 591 # x15_stack = x15 592 movq %r15,176(%rsp) 593 # i = i_backup 594 movq 184(%rsp),%r15 595 # unsigned>? i -= 4 596 sub $4,%r15 597 # comment:fp stack unchanged by jump 598 # goto mainloop if unsigned> 599 ja ._mainloop 600 # (uint32) x2 += j2 601 addl 64(%rsp),%ecx 602 # x3 <<= 32 603 shl $32,%rsi 604 # x3 += j2 605 addq 64(%rsp),%rsi 606 # (uint64) x3 >>= 32 607 shr $32,%rsi 608 # x3 <<= 32 609 shl $32,%rsi 610 # x2 += x3 611 add %rsi,%rcx 612 # (uint32) x6 += j6 613 addl 80(%rsp),%eax 614 # x7 <<= 32 615 shl $32,%r8 616 # x7 += j6 617 addq 80(%rsp),%r8 618 # (uint64) x7 >>= 32 619 shr $32,%r8 620 # x7 <<= 32 621 shl $32,%r8 622 # x6 += x7 623 add %r8,%rax 624 # (uint32) x8 += j8 625 addl 88(%rsp),%r11d 626 # x9 <<= 32 627 shl $32,%r10 628 # x9 += j8 629 addq 88(%rsp),%r10 630 # (uint64) x9 >>= 32 631 shr $32,%r10 632 # x9 <<= 32 633 shl $32,%r10 634 # x8 += x9 635 add %r10,%r11 636 # (uint32) x12 += j12 637 addl 104(%rsp),%r14d 638 # x13 <<= 32 639 shl $32,%r13 640 # x13 += j12 641 addq 104(%rsp),%r13 642 # (uint64) x13 >>= 32 643 shr $32,%r13 644 # x13 <<= 32 645 shl $32,%r13 646 # x12 += x13 647 add %r13,%r14 648 # (uint32) x0 += j0 649 addl 56(%rsp),%edx 650 # x1 <<= 32 651 shl $32,%rdi 652 # x1 += j0 653 addq 56(%rsp),%rdi 654 # (uint64) x1 >>= 32 655 shr $32,%rdi 656 # x1 <<= 32 657 shl $32,%rdi 658 # x0 += x1 659 add %rdi,%rdx 660 # x5 = x5_stack 661 movq 160(%rsp),%rdi 662 # (uint32) x4 += j4 663 addl 72(%rsp),%r9d 664 # x5 <<= 32 665 shl $32,%rdi 666 # x5 += j4 667 addq 72(%rsp),%rdi 668 # (uint64) x5 >>= 32 669 shr $32,%rdi 670 # x5 <<= 32 671 shl $32,%rdi 672 # x4 += x5 673 add %rdi,%r9 674 # x10 = x10_stack 675 movq 168(%rsp),%r8 676 # (uint32) x10 += j10 677 addl 96(%rsp),%r8d 678 # x11 <<= 32 679 shl $32,%r12 680 # x11 += j10 681 addq 96(%rsp),%r12 682 # (uint64) x11 >>= 32 683 shr $32,%r12 684 # x11 <<= 32 685 shl $32,%r12 686 # x10 += x11 687 add %r12,%r8 688 # x15 = x15_stack 689 movq 176(%rsp),%rdi 690 # (uint32) x14 += j14 691 addl 112(%rsp),%ebx 692 # x15 <<= 32 693 shl $32,%rdi 694 # x15 += j14 695 addq 112(%rsp),%rdi 696 # (uint64) x15 >>= 32 697 shr $32,%rdi 698 # x15 <<= 32 699 shl $32,%rdi 700 # x14 += x15 701 add %rdi,%rbx 702 # out = out_backup 703 movq 136(%rsp),%rdi 704 # m = m_backup 705 movq 144(%rsp),%rsi 706 # x0 ^= *(uint64 *) (m + 0) 707 xorq 0(%rsi),%rdx 708 # *(uint64 *) (out + 0) = x0 709 movq %rdx,0(%rdi) 710 # x2 ^= *(uint64 *) (m + 8) 711 xorq 8(%rsi),%rcx 712 # *(uint64 *) (out + 8) = x2 713 movq %rcx,8(%rdi) 714 # x4 ^= *(uint64 *) (m + 16) 715 xorq 16(%rsi),%r9 716 # *(uint64 *) (out + 16) = x4 717 movq %r9,16(%rdi) 718 # x6 ^= *(uint64 *) (m + 24) 719 xorq 24(%rsi),%rax 720 # *(uint64 *) (out + 24) = x6 721 movq %rax,24(%rdi) 722 # x8 ^= *(uint64 *) (m + 32) 723 xorq 32(%rsi),%r11 724 # *(uint64 *) (out + 32) = x8 725 movq %r11,32(%rdi) 726 # x10 ^= *(uint64 *) (m + 40) 727 xorq 40(%rsi),%r8 728 # *(uint64 *) (out + 40) = x10 729 movq %r8,40(%rdi) 730 # x12 ^= *(uint64 *) (m + 48) 731 xorq 48(%rsi),%r14 732 # *(uint64 *) (out + 48) = x12 733 movq %r14,48(%rdi) 734 # x14 ^= *(uint64 *) (m + 56) 735 xorq 56(%rsi),%rbx 736 # *(uint64 *) (out + 56) = x14 737 movq %rbx,56(%rdi) 738 # bytes = bytes_backup 739 movq 152(%rsp),%rdx 740 # in8 = j8 741 movq 88(%rsp),%rcx 742 # in8 += 1 743 add $1,%rcx 744 # j8 = in8 745 movq %rcx,88(%rsp) 746 # unsigned>? unsigned<? bytes - 64 747 cmp $64,%rdx 748 # comment:fp stack unchanged by jump 749 # goto bytesatleast65 if unsigned> 750 ja ._bytesatleast65 751 # comment:fp stack unchanged by jump 752 # goto bytesatleast64 if !unsigned< 753 jae ._bytesatleast64 754 # m = out 755 mov %rdi,%rsi 756 # out = ctarget 757 movq 128(%rsp),%rdi 758 # i = bytes 759 mov %rdx,%rcx 760 # while (i) { *out++ = *m++; --i } 761 rep movsb 762 # comment:fp stack unchanged by fallthrough 763# bytesatleast64: 764._bytesatleast64: 765 # x = x_backup 766 movq 120(%rsp),%rdi 767 # in8 = j8 768 movq 88(%rsp),%rsi 769 # *(uint64 *) (x + 32) = in8 770 movq %rsi,32(%rdi) 771 # r11 = r11_stack 772 movq 0(%rsp),%r11 773 # r12 = r12_stack 774 movq 8(%rsp),%r12 775 # r13 = r13_stack 776 movq 16(%rsp),%r13 777 # r14 = r14_stack 778 movq 24(%rsp),%r14 779 # r15 = r15_stack 780 movq 32(%rsp),%r15 781 # rbx = rbx_stack 782 movq 40(%rsp),%rbx 783 # rbp = rbp_stack 784 movq 48(%rsp),%rbp 785 # comment:fp stack unchanged by fallthrough 786# done: 787._done: 788 # leave 789 add %r11,%rsp 790 mov %rdi,%rax 791 mov %rsi,%rdx 792 ret 793# bytesatleast65: 794._bytesatleast65: 795 # bytes -= 64 796 sub $64,%rdx 797 # out += 64 798 add $64,%rdi 799 # m += 64 800 add $64,%rsi 801 # comment:fp stack unchanged by jump 802 # goto bytesatleast1 803 jmp ._bytesatleast1 804ENDPROC(salsa20_encrypt_bytes) 805 806# enter salsa20_keysetup 807ENTRY(salsa20_keysetup) 808 mov %rsp,%r11 809 and $31,%r11 810 add $256,%r11 811 sub %r11,%rsp 812 # k = arg2 813 mov %rsi,%rsi 814 # kbits = arg3 815 mov %rdx,%rdx 816 # x = arg1 817 mov %rdi,%rdi 818 # in0 = *(uint64 *) (k + 0) 819 movq 0(%rsi),%r8 820 # in2 = *(uint64 *) (k + 8) 821 movq 8(%rsi),%r9 822 # *(uint64 *) (x + 4) = in0 823 movq %r8,4(%rdi) 824 # *(uint64 *) (x + 12) = in2 825 movq %r9,12(%rdi) 826 # unsigned<? kbits - 256 827 cmp $256,%rdx 828 # comment:fp stack unchanged by jump 829 # goto kbits128 if unsigned< 830 jb ._kbits128 831# kbits256: 832._kbits256: 833 # in10 = *(uint64 *) (k + 16) 834 movq 16(%rsi),%rdx 835 # in12 = *(uint64 *) (k + 24) 836 movq 24(%rsi),%rsi 837 # *(uint64 *) (x + 44) = in10 838 movq %rdx,44(%rdi) 839 # *(uint64 *) (x + 52) = in12 840 movq %rsi,52(%rdi) 841 # in0 = 1634760805 842 mov $1634760805,%rsi 843 # in4 = 857760878 844 mov $857760878,%rdx 845 # in10 = 2036477234 846 mov $2036477234,%rcx 847 # in14 = 1797285236 848 mov $1797285236,%r8 849 # *(uint32 *) (x + 0) = in0 850 movl %esi,0(%rdi) 851 # *(uint32 *) (x + 20) = in4 852 movl %edx,20(%rdi) 853 # *(uint32 *) (x + 40) = in10 854 movl %ecx,40(%rdi) 855 # *(uint32 *) (x + 60) = in14 856 movl %r8d,60(%rdi) 857 # comment:fp stack unchanged by jump 858 # goto keysetupdone 859 jmp ._keysetupdone 860# kbits128: 861._kbits128: 862 # in10 = *(uint64 *) (k + 0) 863 movq 0(%rsi),%rdx 864 # in12 = *(uint64 *) (k + 8) 865 movq 8(%rsi),%rsi 866 # *(uint64 *) (x + 44) = in10 867 movq %rdx,44(%rdi) 868 # *(uint64 *) (x + 52) = in12 869 movq %rsi,52(%rdi) 870 # in0 = 1634760805 871 mov $1634760805,%rsi 872 # in4 = 824206446 873 mov $824206446,%rdx 874 # in10 = 2036477238 875 mov $2036477238,%rcx 876 # in14 = 1797285236 877 mov $1797285236,%r8 878 # *(uint32 *) (x + 0) = in0 879 movl %esi,0(%rdi) 880 # *(uint32 *) (x + 20) = in4 881 movl %edx,20(%rdi) 882 # *(uint32 *) (x + 40) = in10 883 movl %ecx,40(%rdi) 884 # *(uint32 *) (x + 60) = in14 885 movl %r8d,60(%rdi) 886# keysetupdone: 887._keysetupdone: 888 # leave 889 add %r11,%rsp 890 mov %rdi,%rax 891 mov %rsi,%rdx 892 ret 893ENDPROC(salsa20_keysetup) 894 895# enter salsa20_ivsetup 896ENTRY(salsa20_ivsetup) 897 mov %rsp,%r11 898 and $31,%r11 899 add $256,%r11 900 sub %r11,%rsp 901 # iv = arg2 902 mov %rsi,%rsi 903 # x = arg1 904 mov %rdi,%rdi 905 # in6 = *(uint64 *) (iv + 0) 906 movq 0(%rsi),%rsi 907 # in8 = 0 908 mov $0,%r8 909 # *(uint64 *) (x + 24) = in6 910 movq %rsi,24(%rdi) 911 # *(uint64 *) (x + 32) = in8 912 movq %r8,32(%rdi) 913 # leave 914 add %r11,%rsp 915 mov %rdi,%rax 916 mov %rsi,%rdx 917 ret 918ENDPROC(salsa20_ivsetup) 919