Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(93)

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2847663005: [WASM SIMD] Replace primitive shuffles with general Shuffle. (Closed)
Patch Set: Rebase. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/api.cc ('k') | src/compiler/instruction-selector.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/base/adapters.h" 5 #include "src/base/adapters.h"
6 #include "src/base/bits.h" 6 #include "src/base/bits.h"
7 #include "src/compiler/instruction-selector-impl.h" 7 #include "src/compiler/instruction-selector-impl.h"
8 #include "src/compiler/node-matchers.h" 8 #include "src/compiler/node-matchers.h"
9 #include "src/compiler/node-properties.h" 9 #include "src/compiler/node-properties.h"
10 10
(...skipping 2396 matching lines...) Expand 10 before | Expand all | Expand 10 after
2407 V(I32x4UConvertF32x4, kArmI32x4UConvertF32x4) \ 2407 V(I32x4UConvertF32x4, kArmI32x4UConvertF32x4) \
2408 V(I32x4UConvertI16x8Low, kArmI32x4UConvertI16x8Low) \ 2408 V(I32x4UConvertI16x8Low, kArmI32x4UConvertI16x8Low) \
2409 V(I32x4UConvertI16x8High, kArmI32x4UConvertI16x8High) \ 2409 V(I32x4UConvertI16x8High, kArmI32x4UConvertI16x8High) \
2410 V(I16x8SConvertI8x16Low, kArmI16x8SConvertI8x16Low) \ 2410 V(I16x8SConvertI8x16Low, kArmI16x8SConvertI8x16Low) \
2411 V(I16x8SConvertI8x16High, kArmI16x8SConvertI8x16High) \ 2411 V(I16x8SConvertI8x16High, kArmI16x8SConvertI8x16High) \
2412 V(I16x8Neg, kArmI16x8Neg) \ 2412 V(I16x8Neg, kArmI16x8Neg) \
2413 V(I16x8UConvertI8x16Low, kArmI16x8UConvertI8x16Low) \ 2413 V(I16x8UConvertI8x16Low, kArmI16x8UConvertI8x16Low) \
2414 V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \ 2414 V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \
2415 V(I8x16Neg, kArmI8x16Neg) \ 2415 V(I8x16Neg, kArmI8x16Neg) \
2416 V(S128Not, kArmS128Not) \ 2416 V(S128Not, kArmS128Not) \
2417 V(S32x2Reverse, kArmS32x2Reverse) \
2418 V(S16x4Reverse, kArmS16x4Reverse) \
2419 V(S16x2Reverse, kArmS16x2Reverse) \
2420 V(S8x8Reverse, kArmS8x8Reverse) \
2421 V(S8x4Reverse, kArmS8x4Reverse) \
2422 V(S8x2Reverse, kArmS8x2Reverse) \
2423 V(S1x4Not, kArmS128Not) \ 2417 V(S1x4Not, kArmS128Not) \
2424 V(S1x4AnyTrue, kArmS1x4AnyTrue) \ 2418 V(S1x4AnyTrue, kArmS1x4AnyTrue) \
2425 V(S1x4AllTrue, kArmS1x4AllTrue) \ 2419 V(S1x4AllTrue, kArmS1x4AllTrue) \
2426 V(S1x8Not, kArmS128Not) \ 2420 V(S1x8Not, kArmS128Not) \
2427 V(S1x8AnyTrue, kArmS1x8AnyTrue) \ 2421 V(S1x8AnyTrue, kArmS1x8AnyTrue) \
2428 V(S1x8AllTrue, kArmS1x8AllTrue) \ 2422 V(S1x8AllTrue, kArmS1x8AllTrue) \
2429 V(S1x16Not, kArmS128Not) \ 2423 V(S1x16Not, kArmS128Not) \
2430 V(S1x16AnyTrue, kArmS1x16AnyTrue) \ 2424 V(S1x16AnyTrue, kArmS1x16AnyTrue) \
2431 V(S1x16AllTrue, kArmS1x16AllTrue) 2425 V(S1x16AllTrue, kArmS1x16AllTrue)
2432 2426
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
2511 V(S1x4And, kArmS128And) \ 2505 V(S1x4And, kArmS128And) \
2512 V(S1x4Or, kArmS128Or) \ 2506 V(S1x4Or, kArmS128Or) \
2513 V(S1x4Xor, kArmS128Xor) \ 2507 V(S1x4Xor, kArmS128Xor) \
2514 V(S1x8And, kArmS128And) \ 2508 V(S1x8And, kArmS128And) \
2515 V(S1x8Or, kArmS128Or) \ 2509 V(S1x8Or, kArmS128Or) \
2516 V(S1x8Xor, kArmS128Xor) \ 2510 V(S1x8Xor, kArmS128Xor) \
2517 V(S1x16And, kArmS128And) \ 2511 V(S1x16And, kArmS128And) \
2518 V(S1x16Or, kArmS128Or) \ 2512 V(S1x16Or, kArmS128Or) \
2519 V(S1x16Xor, kArmS128Xor) 2513 V(S1x16Xor, kArmS128Xor)
2520 2514
2521 #define SIMD_SHUFFLE_OP_LIST(V) \
2522 V(S32x4ZipLeft) \
2523 V(S32x4ZipRight) \
2524 V(S32x4UnzipLeft) \
2525 V(S32x4UnzipRight) \
2526 V(S32x4TransposeLeft) \
2527 V(S32x4TransposeRight) \
2528 V(S16x8ZipLeft) \
2529 V(S16x8ZipRight) \
2530 V(S16x8UnzipLeft) \
2531 V(S16x8UnzipRight) \
2532 V(S16x8TransposeLeft) \
2533 V(S16x8TransposeRight) \
2534 V(S8x16ZipLeft) \
2535 V(S8x16ZipRight) \
2536 V(S8x16UnzipLeft) \
2537 V(S8x16UnzipRight) \
2538 V(S8x16TransposeLeft) \
2539 V(S8x16TransposeRight)
2540
2541 #define SIMD_VISIT_SPLAT(Type) \ 2515 #define SIMD_VISIT_SPLAT(Type) \
2542 void InstructionSelector::Visit##Type##Splat(Node* node) { \ 2516 void InstructionSelector::Visit##Type##Splat(Node* node) { \
2543 VisitRR(this, kArm##Type##Splat, node); \ 2517 VisitRR(this, kArm##Type##Splat, node); \
2544 } 2518 }
2545 SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) 2519 SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
2546 #undef SIMD_VISIT_SPLAT 2520 #undef SIMD_VISIT_SPLAT
2547 2521
2548 #define SIMD_VISIT_EXTRACT_LANE(Type) \ 2522 #define SIMD_VISIT_EXTRACT_LANE(Type) \
2549 void InstructionSelector::Visit##Type##ExtractLane(Node* node) { \ 2523 void InstructionSelector::Visit##Type##ExtractLane(Node* node) { \
2550 VisitRRI(this, kArm##Type##ExtractLane, node); \ 2524 VisitRRI(this, kArm##Type##ExtractLane, node); \
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2588 SIMD_BINOP_LIST(SIMD_VISIT_BINOP) 2562 SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
2589 #undef SIMD_VISIT_BINOP 2563 #undef SIMD_VISIT_BINOP
2590 2564
2591 #define SIMD_VISIT_SELECT_OP(format) \ 2565 #define SIMD_VISIT_SELECT_OP(format) \
2592 void InstructionSelector::VisitS##format##Select(Node* node) { \ 2566 void InstructionSelector::VisitS##format##Select(Node* node) { \
2593 VisitRRRR(this, kArmS128Select, node); \ 2567 VisitRRRR(this, kArmS128Select, node); \
2594 } 2568 }
2595 SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP) 2569 SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP)
2596 #undef SIMD_VISIT_SELECT_OP 2570 #undef SIMD_VISIT_SELECT_OP
2597 2571
2598 #define SIMD_VISIT_SHUFFLE_OP(Name) \ 2572 namespace {
2599 void InstructionSelector::Visit##Name(Node* node) { \ 2573 template <int LANES>
2600 VisitRRRShuffle(this, kArm##Name, node); \ 2574 struct ShuffleEntry {
2575 uint8_t shuffle[LANES];
2576 ArchOpcode opcode;
2577 };
2578
2579 static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
2580 {{0, 4, 1, 5}, kArmS32x4ZipLeft},
2581 {{2, 6, 3, 7}, kArmS32x4ZipRight},
2582 {{0, 2, 4, 6}, kArmS32x4UnzipLeft},
2583 {{1, 3, 5, 7}, kArmS32x4UnzipRight},
2584 {{0, 4, 2, 6}, kArmS32x4TransposeLeft},
2585 {{1, 5, 3, 7}, kArmS32x4TransposeRight},
2586 {{1, 0, 3, 2}, kArmS32x2Reverse},
2587 };
2588
2589 static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
2590 {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
2591 {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight},
2592 {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft},
2593 {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight},
2594 {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
2595 {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
2596 {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
2597 {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse},
2598 };
2599
2600 static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
2601 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
2602 kArmS8x16ZipLeft},
2603 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
2604 kArmS8x16ZipRight},
2605 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
2606 kArmS8x16UnzipLeft},
2607 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
2608 kArmS8x16UnzipRight},
2609 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
2610 kArmS8x16TransposeLeft},
2611 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
2612 kArmS8x16TransposeRight},
2613 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse},
2614 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
2615 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse},
2616 };
2617
2618 // Use a non-shuffle opcode to signal no match.
2619 static const ArchOpcode kNoShuffle = kArmS128Not;
2620
2621 template <int LANES>
2622 ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle,
2623 const ShuffleEntry<LANES>* table,
2624 size_t num_entries, uint8_t mask) {
2625 for (size_t i = 0; i < num_entries; i++) {
2626 const ShuffleEntry<LANES>& entry = table[i];
2627 int j = 0;
2628 for (; j < LANES; j++) {
2629 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
2630 break;
2631 }
2632 }
2633 if (j == LANES) return entry.opcode;
2601 } 2634 }
2602 SIMD_SHUFFLE_OP_LIST(SIMD_VISIT_SHUFFLE_OP) 2635 return kNoShuffle;
2603 #undef SIMD_VISIT_SHUFFLE_OP 2636 }
2604 2637
2605 void InstructionSelector::VisitS8x16Concat(Node* node) { 2638 // Returns the bias if shuffle is a concatenation, 0 otherwise.
2639 template <int LANES>
2640 uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) {
2641 uint8_t start = shuffle[0];
2642 int i = 1;
2643 for (; i < LANES - start; i++) {
2644 if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0;
2645 }
2646 uint8_t wrap = LANES;
2647 for (; i < LANES; i++, wrap++) {
2648 if ((shuffle[i] & mask) != (wrap & mask)) return 0;
2649 }
2650 return start;
2651 }
2652
2653 // Canonicalize shuffles to make pattern matching simpler. Returns a mask that
2654 // will ignore the high bit of indices in some cases.
2655 uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
2656 int num_lanes) {
2657 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2658 uint8_t mask = 0xff;
2659 // If shuffle is unary, set 'mask' to ignore the high bit of the indices.
2660 // Replace any unused source with the other.
2661 if (selector->GetVirtualRegister(node->InputAt(0)) ==
2662 selector->GetVirtualRegister(node->InputAt(1))) {
2663 // unary, src0 == src1.
2664 mask = num_lanes - 1;
2665 } else {
2666 bool src0_is_used = false;
2667 bool src1_is_used = false;
2668 for (int i = 0; i < num_lanes; i++) {
2669 if (shuffle[i] < num_lanes) {
2670 src0_is_used = true;
2671 } else {
2672 src1_is_used = true;
2673 }
2674 }
2675 if (src0_is_used && !src1_is_used) {
2676 node->ReplaceInput(1, node->InputAt(0));
2677 mask = num_lanes - 1;
2678 } else if (src1_is_used && !src0_is_used) {
2679 node->ReplaceInput(0, node->InputAt(1));
2680 mask = num_lanes - 1;
2681 }
2682 }
2683 return mask;
2684 }
2685
2686 } // namespace
2687
2688 void InstructionSelector::VisitS32x4Shuffle(Node* node) {
2689 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2690 uint8_t mask = CanonicalizeShuffle(this, node, 4);
2691 ArchOpcode opcode = TryMatchArchShuffle<4>(
2692 shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask);
2693 if (opcode != kNoShuffle) {
2694 VisitRRRShuffle(this, opcode, node);
2695 return;
2696 }
2606 ArmOperandGenerator g(this); 2697 ArmOperandGenerator g(this);
2607 int32_t imm = OpParameter<int32_t>(node); 2698 uint8_t lanes = TryMatchConcat<4>(shuffle, mask);
2608 Emit(kArmS8x16Concat, g.DefineAsRegister(node), 2699 if (lanes != 0) {
2609 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), 2700 Emit(kArmS8x16Concat, g.DefineAsRegister(node),
2610 g.UseImmediate(imm)); 2701 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
2702 g.UseImmediate(lanes * 4));
2703 return;
2704 }
2705 // TODO(bbudge) vtbl to handle all other shuffles.
2706 }
2707
2708 void InstructionSelector::VisitS16x8Shuffle(Node* node) {
2709 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2710 uint8_t mask = CanonicalizeShuffle(this, node, 8);
2711 ArchOpcode opcode = TryMatchArchShuffle<8>(
2712 shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask);
2713 if (opcode != kNoShuffle) {
2714 VisitRRRShuffle(this, opcode, node);
2715 return;
2716 }
2717 ArmOperandGenerator g(this);
2718 uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
2719 if (lanes != 0) {
2720 Emit(kArmS8x16Concat, g.DefineAsRegister(node),
2721 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
2722 g.UseImmediate(lanes * 2));
2723 }
2724 // TODO(bbudge) vtbl to handle all other shuffles.
2725 }
2726
2727 void InstructionSelector::VisitS8x16Shuffle(Node* node) {
2728 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2729 uint8_t mask = CanonicalizeShuffle(this, node, 16);
2730 ArchOpcode opcode = TryMatchArchShuffle<16>(
2731 shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask);
2732 if (opcode != kNoShuffle) {
2733 VisitRRRShuffle(this, opcode, node);
2734 return;
2735 }
2736 ArmOperandGenerator g(this);
2737 uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
2738 if (lanes != 0) {
2739 Emit(kArmS8x16Concat, g.DefineAsRegister(node),
2740 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
2741 g.UseImmediate(lanes));
2742 }
2743 // TODO(bbudge) vtbl to handle all other shuffles.
2611 } 2744 }
2612 2745
2613 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { 2746 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
2614 UNREACHABLE(); 2747 UNREACHABLE();
2615 } 2748 }
2616 2749
2617 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { 2750 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
2618 UNREACHABLE(); 2751 UNREACHABLE();
2619 } 2752 }
2620 2753
(...skipping 30 matching lines...) Expand all
2651 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); 2784 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);
2652 req_aligned[0] = MachineType::Float32(); 2785 req_aligned[0] = MachineType::Float32();
2653 req_aligned[1] = MachineType::Float64(); 2786 req_aligned[1] = MachineType::Float64();
2654 return MachineOperatorBuilder::AlignmentRequirements:: 2787 return MachineOperatorBuilder::AlignmentRequirements::
2655 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); 2788 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
2656 } 2789 }
2657 2790
2658 } // namespace compiler 2791 } // namespace compiler
2659 } // namespace internal 2792 } // namespace internal
2660 } // namespace v8 2793 } // namespace v8
OLDNEW
« no previous file with comments | « src/api.cc ('k') | src/compiler/instruction-selector.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698