Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(539)

Side by Side Diff: runtime/vm/intermediate_language_ia32.cc

Issue 15085006: Inline Uint32x4 operations (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/intermediate_language_arm.cc ('k') | runtime/vm/intermediate_language_mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_IA32. 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_IA32.
6 #if defined(TARGET_ARCH_IA32) 6 #if defined(TARGET_ARCH_IA32)
7 7
8 #include "vm/intermediate_language.h" 8 #include "vm/intermediate_language.h"
9 9
10 #include "lib/error.h" 10 #include "lib/error.h"
(...skipping 2983 matching lines...) Expand 10 before | Expand all | Expand 10 after
2994 2994
2995 2995
2996 void Float32x4ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 2996 void Float32x4ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
2997 XmmRegister v0 = locs()->in(0).fpu_reg(); 2997 XmmRegister v0 = locs()->in(0).fpu_reg();
2998 XmmRegister v1 = locs()->in(1).fpu_reg(); 2998 XmmRegister v1 = locs()->in(1).fpu_reg();
2999 XmmRegister v2 = locs()->in(2).fpu_reg(); 2999 XmmRegister v2 = locs()->in(2).fpu_reg();
3000 XmmRegister v3 = locs()->in(3).fpu_reg(); 3000 XmmRegister v3 = locs()->in(3).fpu_reg();
3001 ASSERT(v0 == locs()->out().fpu_reg()); 3001 ASSERT(v0 == locs()->out().fpu_reg());
3002 __ subl(ESP, Immediate(16)); 3002 __ subl(ESP, Immediate(16));
3003 __ cvtsd2ss(v0, v0); 3003 __ cvtsd2ss(v0, v0);
3004 __ movss(Address(ESP, -16), v0); 3004 __ movss(Address(ESP, 0), v0);
3005 __ movsd(v0, v1); 3005 __ movsd(v0, v1);
3006 __ cvtsd2ss(v0, v0); 3006 __ cvtsd2ss(v0, v0);
3007 __ movss(Address(ESP, -12), v0); 3007 __ movss(Address(ESP, 4), v0);
3008 __ movsd(v0, v2); 3008 __ movsd(v0, v2);
3009 __ cvtsd2ss(v0, v0); 3009 __ cvtsd2ss(v0, v0);
3010 __ movss(Address(ESP, -8), v0); 3010 __ movss(Address(ESP, 8), v0);
3011 __ movsd(v0, v3); 3011 __ movsd(v0, v3);
3012 __ cvtsd2ss(v0, v0); 3012 __ cvtsd2ss(v0, v0);
3013 __ movss(Address(ESP, -4), v0); 3013 __ movss(Address(ESP, 12), v0);
3014 __ movups(v0, Address(ESP, -16)); 3014 __ movups(v0, Address(ESP, 0));
3015 __ addl(ESP, Immediate(16)); 3015 __ addl(ESP, Immediate(16));
3016 } 3016 }
3017 3017
3018 3018
3019 LocationSummary* Float32x4ZeroInstr::MakeLocationSummary() const { 3019 LocationSummary* Float32x4ZeroInstr::MakeLocationSummary() const {
3020 const intptr_t kNumInputs = 0; 3020 const intptr_t kNumInputs = 0;
3021 const intptr_t kNumTemps = 0; 3021 const intptr_t kNumTemps = 0;
3022 LocationSummary* summary = 3022 LocationSummary* summary =
3023 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3023 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3024 summary->set_out(Location::RequiresFpuRegister()); 3024 summary->set_out(Location::RequiresFpuRegister());
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
3240 const intptr_t kNumInputs = 2; 3240 const intptr_t kNumInputs = 2;
3241 const intptr_t kNumTemps = 0; 3241 const intptr_t kNumTemps = 0;
3242 LocationSummary* summary = 3242 LocationSummary* summary =
3243 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3243 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3244 summary->set_in(0, Location::RequiresFpuRegister()); 3244 summary->set_in(0, Location::RequiresFpuRegister());
3245 summary->set_in(1, Location::RequiresFpuRegister()); 3245 summary->set_in(1, Location::RequiresFpuRegister());
3246 summary->set_out(Location::SameAsFirstInput()); 3246 summary->set_out(Location::SameAsFirstInput());
3247 return summary; 3247 return summary;
3248 } 3248 }
3249 3249
3250
3250 void Float32x4WithInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 3251 void Float32x4WithInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3251 XmmRegister replacement = locs()->in(0).fpu_reg(); 3252 XmmRegister replacement = locs()->in(0).fpu_reg();
3252 XmmRegister value = locs()->in(1).fpu_reg(); 3253 XmmRegister value = locs()->in(1).fpu_reg();
3253 3254
3254 ASSERT(locs()->out().fpu_reg() == replacement); 3255 ASSERT(locs()->out().fpu_reg() == replacement);
3255 3256
3256 switch (op_kind()) { 3257 switch (op_kind()) {
3257 case MethodRecognizer::kFloat32x4WithX: 3258 case MethodRecognizer::kFloat32x4WithX:
3258 __ cvtsd2ss(replacement, replacement); 3259 __ cvtsd2ss(replacement, replacement);
3259 __ subl(ESP, Immediate(16)); 3260 __ subl(ESP, Immediate(16));
3260 // Move value to stack. 3261 // Move value to stack.
3261 __ movups(Address(ESP, -16), value); 3262 __ movups(Address(ESP, 0), value);
3262 // Write over X value. 3263 // Write over X value.
3263 __ movss(Address(ESP, -16), replacement); 3264 __ movss(Address(ESP, 0), replacement);
3264 // Move updated value into output register. 3265 // Move updated value into output register.
3265 __ movups(replacement, Address(ESP, -16)); 3266 __ movups(replacement, Address(ESP, 0));
3266 __ addl(ESP, Immediate(16)); 3267 __ addl(ESP, Immediate(16));
3267 break; 3268 break;
3268 case MethodRecognizer::kFloat32x4WithY: 3269 case MethodRecognizer::kFloat32x4WithY:
3269 __ cvtsd2ss(replacement, replacement); 3270 __ cvtsd2ss(replacement, replacement);
3270 __ subl(ESP, Immediate(16)); 3271 __ subl(ESP, Immediate(16));
3271 // Move value to stack. 3272 // Move value to stack.
3272 __ movups(Address(ESP, -16), value); 3273 __ movups(Address(ESP, 0), value);
3273 // Write over Y value. 3274 // Write over Y value.
3274 __ movss(Address(ESP, -12), replacement); 3275 __ movss(Address(ESP, 4), replacement);
3275 // Move updated value into output register. 3276 // Move updated value into output register.
3276 __ movups(replacement, Address(ESP, -16)); 3277 __ movups(replacement, Address(ESP, 0));
3277 __ addl(ESP, Immediate(16)); 3278 __ addl(ESP, Immediate(16));
3278 break; 3279 break;
3279 case MethodRecognizer::kFloat32x4WithZ: 3280 case MethodRecognizer::kFloat32x4WithZ:
3280 __ cvtsd2ss(replacement, replacement); 3281 __ cvtsd2ss(replacement, replacement);
3281 __ subl(ESP, Immediate(16)); 3282 __ subl(ESP, Immediate(16));
3282 // Move value to stack. 3283 // Move value to stack.
3283 __ movups(Address(ESP, -16), value); 3284 __ movups(Address(ESP, 0), value);
3284 // Write over Z value. 3285 // Write over Z value.
3285 __ movss(Address(ESP, -8), replacement); 3286 __ movss(Address(ESP, 8), replacement);
3286 // Move updated value into output register. 3287 // Move updated value into output register.
3287 __ movups(replacement, Address(ESP, -16)); 3288 __ movups(replacement, Address(ESP, 0));
3288 __ addl(ESP, Immediate(16)); 3289 __ addl(ESP, Immediate(16));
3289 break; 3290 break;
3290 case MethodRecognizer::kFloat32x4WithW: 3291 case MethodRecognizer::kFloat32x4WithW:
3291 __ cvtsd2ss(replacement, replacement); 3292 __ cvtsd2ss(replacement, replacement);
3292 __ subl(ESP, Immediate(16)); 3293 __ subl(ESP, Immediate(16));
3293 // Move value to stack. 3294 // Move value to stack.
3294 __ movups(Address(ESP, -16), value); 3295 __ movups(Address(ESP, 0), value);
3295 // Write over W value. 3296 // Write over W value.
3296 __ movss(Address(ESP, -4), replacement); 3297 __ movss(Address(ESP, 12), replacement);
3297 // Move updated value into output register. 3298 // Move updated value into output register.
3298 __ movups(replacement, Address(ESP, -16)); 3299 __ movups(replacement, Address(ESP, 0));
3299 __ addl(ESP, Immediate(16)); 3300 __ addl(ESP, Immediate(16));
3300 break; 3301 break;
3301 default: UNREACHABLE(); 3302 default: UNREACHABLE();
3302 } 3303 }
3303 } 3304 }
3304 3305
3305 3306
3306 LocationSummary* Float32x4ToUint32x4Instr::MakeLocationSummary() const { 3307 LocationSummary* Float32x4ToUint32x4Instr::MakeLocationSummary() const {
3307 const intptr_t kNumInputs = 1; 3308 const intptr_t kNumInputs = 1;
3308 const intptr_t kNumTemps = 0; 3309 const intptr_t kNumTemps = 0;
3309 LocationSummary* summary = 3310 LocationSummary* summary =
3310 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3311 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3311 summary->set_in(0, Location::RequiresFpuRegister()); 3312 summary->set_in(0, Location::RequiresFpuRegister());
3312 summary->set_out(Location::SameAsFirstInput()); 3313 summary->set_out(Location::SameAsFirstInput());
3313 return summary; 3314 return summary;
3314 } 3315 }
3315 3316
3316 3317
3317 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { 3318 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3318 // NOP. 3319 // NOP.
3319 } 3320 }
3320 3321
3321 3322
3323 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const {
3324 const intptr_t kNumInputs = 4;
3325 const intptr_t kNumTemps = 0;
3326 LocationSummary* summary =
3327 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3328 summary->set_in(0, Location::RequiresRegister());
3329 summary->set_in(1, Location::RequiresRegister());
3330 summary->set_in(2, Location::RequiresRegister());
3331 summary->set_in(3, Location::RequiresRegister());
3332 summary->set_out(Location::RequiresFpuRegister());
3333 return summary;
3334 }
3335
3336
3337 void Uint32x4BoolConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3338 Register v0 = locs()->in(0).reg();
3339 Register v1 = locs()->in(1).reg();
3340 Register v2 = locs()->in(2).reg();
3341 Register v3 = locs()->in(3).reg();
3342 XmmRegister result = locs()->out().fpu_reg();
3343 Label x_false, x_done;
3344 Label y_false, y_done;
3345 Label z_false, z_done;
3346 Label w_false, w_done;
3347 __ subl(ESP, Immediate(16));
3348 __ CompareObject(v0, Bool::True());
3349 __ j(NOT_EQUAL, &x_false);
3350 __ movl(Address(ESP, 0), Immediate(0xFFFFFFFF));
3351 __ jmp(&x_done);
3352 __ Bind(&x_false);
3353 __ movl(Address(ESP, 0), Immediate(0x0));
3354 __ Bind(&x_done);
3355
3356 __ CompareObject(v1, Bool::True());
3357 __ j(NOT_EQUAL, &y_false);
3358 __ movl(Address(ESP, 4), Immediate(0xFFFFFFFF));
3359 __ jmp(&y_done);
3360 __ Bind(&y_false);
3361 __ movl(Address(ESP, 4), Immediate(0x0));
3362 __ Bind(&y_done);
3363
3364 __ CompareObject(v2, Bool::True());
3365 __ j(NOT_EQUAL, &z_false);
3366 __ movl(Address(ESP, 8), Immediate(0xFFFFFFFF));
3367 __ jmp(&z_done);
3368 __ Bind(&z_false);
3369 __ movl(Address(ESP, 8), Immediate(0x0));
3370 __ Bind(&z_done);
3371
3372 __ CompareObject(v3, Bool::True());
3373 __ j(NOT_EQUAL, &w_false);
3374 __ movl(Address(ESP, 12), Immediate(0xFFFFFFFF));
3375 __ jmp(&w_done);
3376 __ Bind(&w_false);
3377 __ movl(Address(ESP, 12), Immediate(0x0));
3378 __ Bind(&w_done);
3379
3380 __ movups(result, Address(ESP, 0));
3381 __ addl(ESP, Immediate(16));
3382 }
3383
3384
3385 LocationSummary* Uint32x4GetFlagInstr::MakeLocationSummary() const {
3386 const intptr_t kNumInputs = 1;
3387 const intptr_t kNumTemps = 0;
3388 LocationSummary* summary =
3389 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3390 summary->set_in(0, Location::RequiresFpuRegister());
3391 summary->set_out(Location::RequiresRegister());
3392 return summary;
3393 }
3394
3395
3396 void Uint32x4GetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3397 XmmRegister value = locs()->in(0).fpu_reg();
3398 Register result = locs()->out().reg();
3399 Label done;
3400 Label non_zero;
3401 __ subl(ESP, Immediate(16));
3402 // Move value to stack.
3403 __ movups(Address(ESP, 0), value);
3404 switch (op_kind()) {
3405 case MethodRecognizer::kUint32x4GetFlagX:
3406 __ movl(result, Address(ESP, 0));
3407 break;
3408 case MethodRecognizer::kUint32x4GetFlagY:
3409 __ movl(result, Address(ESP, 4));
3410 break;
3411 case MethodRecognizer::kUint32x4GetFlagZ:
3412 __ movl(result, Address(ESP, 8));
3413 break;
3414 case MethodRecognizer::kUint32x4GetFlagW:
3415 __ movl(result, Address(ESP, 12));
3416 break;
3417 default: UNREACHABLE();
3418 }
3419 __ addl(ESP, Immediate(16));
3420 __ testl(result, result);
3421 __ j(NOT_ZERO, &non_zero, Assembler::kNearJump);
3422 __ LoadObject(result, Bool::False());
3423 __ jmp(&done);
3424 __ Bind(&non_zero);
3425 __ LoadObject(result, Bool::True());
3426 __ Bind(&done);
3427 }
3428
3429
3430 LocationSummary* Uint32x4SelectInstr::MakeLocationSummary() const {
3431 const intptr_t kNumInputs = 3;
3432 const intptr_t kNumTemps = 1;
3433 LocationSummary* summary =
3434 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3435 summary->set_in(0, Location::RequiresFpuRegister());
3436 summary->set_in(1, Location::RequiresFpuRegister());
3437 summary->set_in(2, Location::RequiresFpuRegister());
3438 summary->set_temp(0, Location::RequiresFpuRegister());
3439 summary->set_out(Location::SameAsFirstInput());
3440 return summary;
3441 }
3442
3443
3444 void Uint32x4SelectInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3445 XmmRegister mask = locs()->in(0).fpu_reg();
3446 XmmRegister trueValue = locs()->in(1).fpu_reg();
3447 XmmRegister falseValue = locs()->in(2).fpu_reg();
3448 XmmRegister out = locs()->out().fpu_reg();
3449 XmmRegister temp = locs()->temp(0).fpu_reg();
3450 ASSERT(out == mask);
3451 // Copy mask.
3452 __ movaps(temp, mask);
3453 // Invert it.
3454 __ notps(temp);
3455 // mask = mask & trueValue.
3456 __ andps(mask, trueValue);
3457 // temp = temp & falseValue.
3458 __ andps(temp, falseValue);
3459 // out = mask | temp.
3460 __ orps(mask, temp);
3461 }
3462
3463
3464 LocationSummary* Uint32x4SetFlagInstr::MakeLocationSummary() const {
3465 const intptr_t kNumInputs = 2;
3466 const intptr_t kNumTemps = 0;
3467 LocationSummary* summary =
3468 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3469 summary->set_in(0, Location::RequiresFpuRegister());
3470 summary->set_in(1, Location::RequiresRegister());
3471 summary->set_out(Location::SameAsFirstInput());
3472 return summary;
3473 }
3474
3475
3476 void Uint32x4SetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3477 XmmRegister mask = locs()->in(0).fpu_reg();
3478 Register flag = locs()->in(1).reg();
3479 ASSERT(mask == locs()->out().fpu_reg());
3480 __ subl(ESP, Immediate(16));
3481 // Copy mask to stack.
3482 __ movups(Address(ESP, 0), mask);
3483 Label falsePath, exitPath;
3484 __ CompareObject(flag, Bool::True());
3485 __ j(NOT_EQUAL, &falsePath);
3486 switch (op_kind()) {
3487 case MethodRecognizer::kUint32x4WithFlagX:
3488 __ movl(Address(ESP, 0), Immediate(0xFFFFFFFF));
3489 __ jmp(&exitPath);
3490 __ Bind(&falsePath);
3491 __ movl(Address(ESP, 0), Immediate(0x0));
3492 break;
3493 case MethodRecognizer::kUint32x4WithFlagY:
3494 __ movl(Address(ESP, 4), Immediate(0xFFFFFFFF));
3495 __ jmp(&exitPath);
3496 __ Bind(&falsePath);
3497 __ movl(Address(ESP, 4), Immediate(0x0));
3498 break;
3499 case MethodRecognizer::kUint32x4WithFlagZ:
3500 __ movl(Address(ESP, 8), Immediate(0xFFFFFFFF));
3501 __ jmp(&exitPath);
3502 __ Bind(&falsePath);
3503 __ movl(Address(ESP, 8), Immediate(0x0));
3504 break;
3505 case MethodRecognizer::kUint32x4WithFlagW:
3506 __ movl(Address(ESP, 12), Immediate(0xFFFFFFFF));
3507 __ jmp(&exitPath);
3508 __ Bind(&falsePath);
3509 __ movl(Address(ESP, 12), Immediate(0x0));
3510 break;
3511 default: UNREACHABLE();
3512 }
3513 __ Bind(&exitPath);
3514 // Copy mask back to register.
3515 __ movups(mask, Address(ESP, 0));
3516 __ addl(ESP, Immediate(16));
3517 }
3518
3519
3520 LocationSummary* Uint32x4ToFloat32x4Instr::MakeLocationSummary() const {
3521 const intptr_t kNumInputs = 1;
3522 const intptr_t kNumTemps = 0;
3523 LocationSummary* summary =
3524 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3525 summary->set_in(0, Location::RequiresFpuRegister());
3526 summary->set_out(Location::SameAsFirstInput());
3527 return summary;
3528 }
3529
3530
3531 void Uint32x4ToFloat32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3532 // NOP.
3533 }
3534
3535
3536 LocationSummary* BinaryUint32x4OpInstr::MakeLocationSummary() const {
3537 const intptr_t kNumInputs = 2;
3538 const intptr_t kNumTemps = 0;
3539 LocationSummary* summary =
3540 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3541 summary->set_in(0, Location::RequiresFpuRegister());
3542 summary->set_in(1, Location::RequiresFpuRegister());
3543 summary->set_out(Location::SameAsFirstInput());
3544 return summary;
3545 }
3546
3547
3548 void BinaryUint32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3549 XmmRegister left = locs()->in(0).fpu_reg();
3550 XmmRegister right = locs()->in(1).fpu_reg();
3551 ASSERT(left == locs()->out().fpu_reg());
3552 switch (op_kind()) {
3553 case Token::kBIT_AND: {
3554 __ andps(left, right);
3555 break;
3556 }
3557 case Token::kBIT_OR: {
3558 __ orps(left, right);
3559 break;
3560 }
3561 case Token::kBIT_XOR: {
3562 __ xorps(left, right);
3563 break;
3564 }
3565 default: UNREACHABLE();
3566 }
3567 }
3568
3569
3322 LocationSummary* MathSqrtInstr::MakeLocationSummary() const { 3570 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
3323 const intptr_t kNumInputs = 1; 3571 const intptr_t kNumInputs = 1;
3324 const intptr_t kNumTemps = 0; 3572 const intptr_t kNumTemps = 0;
3325 LocationSummary* summary = 3573 LocationSummary* summary =
3326 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3574 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3327 summary->set_in(0, Location::RequiresFpuRegister()); 3575 summary->set_in(0, Location::RequiresFpuRegister());
3328 summary->set_out(Location::RequiresFpuRegister()); 3576 summary->set_out(Location::RequiresFpuRegister());
3329 return summary; 3577 return summary;
3330 } 3578 }
3331 3579
(...skipping 1094 matching lines...) Expand 10 before | Expand all | Expand 10 after
4426 PcDescriptors::kOther, 4674 PcDescriptors::kOther,
4427 locs()); 4675 locs());
4428 __ Drop(2); // Discard type arguments and receiver. 4676 __ Drop(2); // Discard type arguments and receiver.
4429 } 4677 }
4430 4678
4431 } // namespace dart 4679 } // namespace dart
4432 4680
4433 #undef __ 4681 #undef __
4434 4682
4435 #endif // defined TARGET_ARCH_IA32 4683 #endif // defined TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « runtime/vm/intermediate_language_arm.cc ('k') | runtime/vm/intermediate_language_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698