| OLD | NEW |
| 1 | 1 |
| 2 /* | 2 /* |
| 3 * Copyright 2006 The Android Open Source Project | 3 * Copyright 2006 The Android Open Source Project |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
| 6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 | 9 |
| 10 #include "SkBlurMask.h" | 10 #include "SkBlurMask.h" |
| (...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 405 } | 405 } |
| 406 | 406 |
| 407 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius
) | 407 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius
) |
| 408 { | 408 { |
| 409 *loRadius = *hiRadius = SkScalarCeil(passRadius); | 409 *loRadius = *hiRadius = SkScalarCeil(passRadius); |
| 410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { | 410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { |
| 411 *loRadius = *hiRadius - 1; | 411 *loRadius = *hiRadius - 1; |
| 412 } | 412 } |
| 413 } | 413 } |
| 414 | 414 |
| 415 // Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows, | |
| 416 // breakeven on Mac, and ~15% slowdown on Linux. | |
| 417 // Reading a word at a time when bulding the sum buffer seems to give | |
| 418 // us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux. | |
| 419 #if defined(SK_BUILD_FOR_WIN32) | |
| 420 #define UNROLL_KERNEL_LOOP 1 | |
| 421 #endif | |
| 422 | |
| 423 /** The sum buffer is an array of u32 to hold the accumulated sum of all of the | |
| 424 src values at their position, plus all values above and to the left. | |
| 425 When we sample into this buffer, we need an initial row and column of 0s, | |
| 426 so we have an index correspondence as follows: | |
| 427 | |
| 428 src[i, j] == sum[i+1, j+1] | |
| 429 sum[0, j] == sum[i, 0] == 0 | |
| 430 | |
| 431 We assume that the sum buffer's stride == its width | |
| 432 */ | |
| 433 static void build_sum_buffer(uint32_t sum[], int srcW, int srcH, | |
| 434 const uint8_t src[], int srcRB) { | |
| 435 int sumW = srcW + 1; | |
| 436 | |
| 437 SkASSERT(srcRB >= srcW); | |
| 438 // mod srcRB so we can apply it after each row | |
| 439 srcRB -= srcW; | |
| 440 | |
| 441 int x, y; | |
| 442 | |
| 443 // zero out the top row and column | |
| 444 memset(sum, 0, sumW * sizeof(sum[0])); | |
| 445 sum += sumW; | |
| 446 | |
| 447 // special case first row | |
| 448 uint32_t X = 0; | |
| 449 *sum++ = 0; // initialze the first column to 0 | |
| 450 for (x = srcW - 1; x >= 0; --x) { | |
| 451 X = *src++ + X; | |
| 452 *sum++ = X; | |
| 453 } | |
| 454 src += srcRB; | |
| 455 | |
| 456 // now do the rest of the rows | |
| 457 for (y = srcH - 1; y > 0; --y) { | |
| 458 uint32_t L = 0; | |
| 459 uint32_t C = 0; | |
| 460 *sum++ = 0; // initialze the first column to 0 | |
| 461 | |
| 462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) { | |
| 463 uint32_t T = sum[-sumW]; | |
| 464 X = *src++ + L + T - C; | |
| 465 *sum++ = X; | |
| 466 L = X; | |
| 467 C = T; | |
| 468 } | |
| 469 | |
| 470 for (; x >= 4; x-=4) { | |
| 471 uint32_t T = sum[-sumW]; | |
| 472 X = *src++ + L + T - C; | |
| 473 *sum++ = X; | |
| 474 L = X; | |
| 475 C = T; | |
| 476 T = sum[-sumW]; | |
| 477 X = *src++ + L + T - C; | |
| 478 *sum++ = X; | |
| 479 L = X; | |
| 480 C = T; | |
| 481 T = sum[-sumW]; | |
| 482 X = *src++ + L + T - C; | |
| 483 *sum++ = X; | |
| 484 L = X; | |
| 485 C = T; | |
| 486 T = sum[-sumW]; | |
| 487 X = *src++ + L + T - C; | |
| 488 *sum++ = X; | |
| 489 L = X; | |
| 490 C = T; | |
| 491 } | |
| 492 | |
| 493 for (; x >= 0; --x) { | |
| 494 uint32_t T = sum[-sumW]; | |
| 495 X = *src++ + L + T - C; | |
| 496 *sum++ = X; | |
| 497 L = X; | |
| 498 C = T; | |
| 499 } | |
| 500 src += srcRB; | |
| 501 } | |
| 502 } | |
| 503 | |
| 504 /** | |
| 505 * This is the path for apply_kernel() to be taken when the kernel | |
| 506 * is wider than the source image. | |
| 507 */ | |
| 508 static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], | |
| 509 int sw, int sh) { | |
| 510 SkASSERT(2*rx > sw); | |
| 511 | |
| 512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); | |
| 513 | |
| 514 int sumStride = sw + 1; | |
| 515 | |
| 516 int dw = sw + 2*rx; | |
| 517 int dh = sh + 2*ry; | |
| 518 | |
| 519 int prev_y = -2*ry; | |
| 520 int next_y = 1; | |
| 521 | |
| 522 for (int y = 0; y < dh; ++y) { | |
| 523 int py = SkClampPos(prev_y) * sumStride; | |
| 524 int ny = SkFastMin32(next_y, sh) * sumStride; | |
| 525 | |
| 526 int prev_x = -2*rx; | |
| 527 int next_x = 1; | |
| 528 | |
| 529 for (int x = 0; x < dw; ++x) { | |
| 530 int px = SkClampPos(prev_x); | |
| 531 int nx = SkFastMin32(next_x, sw); | |
| 532 | |
| 533 // TODO: should we be adding 1/2 (1 << 23) to round to the | |
| 534 // nearest integer here? | |
| 535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
| 536 *dst++ = SkToU8(tmp * scale >> 24); | |
| 537 | |
| 538 prev_x += 1; | |
| 539 next_x += 1; | |
| 540 } | |
| 541 | |
| 542 prev_y += 1; | |
| 543 next_y += 1; | |
| 544 } | |
| 545 } | |
| 546 /** | |
| 547 * sw and sh are the width and height of the src. Since the sum buffer | |
| 548 * matches that, but has an extra row and col at the beginning (with zeros), | |
| 549 * we can just use sw and sh as our "max" values for pinning coordinates | |
| 550 * when sampling into sum[][] | |
| 551 * | |
| 552 * The inner loop is conceptually simple; we break it into several sections | |
| 553 * to improve performance. Here's the original version: | |
| 554 for (int x = 0; x < dw; ++x) { | |
| 555 int px = SkClampPos(prev_x); | |
| 556 int nx = SkFastMin32(next_x, sw); | |
| 557 | |
| 558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
| 559 *dst++ = SkToU8(tmp * scale >> 24); | |
| 560 | |
| 561 prev_x += 1; | |
| 562 next_x += 1; | |
| 563 } | |
| 564 * The sections are: | |
| 565 * left-hand section, where prev_x is clamped to 0 | |
| 566 * center section, where neither prev_x nor next_x is clamped | |
| 567 * right-hand section, where next_x is clamped to sw | |
| 568 * On some operating systems, the center section is unrolled for additional | |
| 569 * speedup. | |
| 570 */ | |
| 571 static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[], | |
| 572 int sw, int sh) { | |
| 573 if (2*rx > sw) { | |
| 574 kernel_clamped(dst, rx, ry, sum, sw, sh); | |
| 575 return; | |
| 576 } | |
| 577 | |
| 578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); | |
| 579 | |
| 580 int sumStride = sw + 1; | |
| 581 | |
| 582 int dw = sw + 2*rx; | |
| 583 int dh = sh + 2*ry; | |
| 584 | |
| 585 int prev_y = -2*ry; | |
| 586 int next_y = 1; | |
| 587 | |
| 588 SkASSERT(2*rx <= dw - 2*rx); | |
| 589 | |
| 590 for (int y = 0; y < dh; ++y) { | |
| 591 int py = SkClampPos(prev_y) * sumStride; | |
| 592 int ny = SkFastMin32(next_y, sh) * sumStride; | |
| 593 | |
| 594 int prev_x = -2*rx; | |
| 595 int next_x = 1; | |
| 596 int x = 0; | |
| 597 | |
| 598 for (; x < 2*rx; ++x) { | |
| 599 SkASSERT(prev_x <= 0); | |
| 600 SkASSERT(next_x <= sw); | |
| 601 | |
| 602 int px = 0; | |
| 603 int nx = next_x; | |
| 604 | |
| 605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
| 606 *dst++ = SkToU8(tmp * scale >> 24); | |
| 607 | |
| 608 prev_x += 1; | |
| 609 next_x += 1; | |
| 610 } | |
| 611 | |
| 612 int i0 = prev_x + py; | |
| 613 int i1 = next_x + ny; | |
| 614 int i2 = next_x + py; | |
| 615 int i3 = prev_x + ny; | |
| 616 | |
| 617 #if UNROLL_KERNEL_LOOP | |
| 618 for (; x < dw - 2*rx - 4; x += 4) { | |
| 619 SkASSERT(prev_x >= 0); | |
| 620 SkASSERT(next_x <= sw); | |
| 621 | |
| 622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 623 *dst++ = SkToU8(tmp * scale >> 24); | |
| 624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 625 *dst++ = SkToU8(tmp * scale >> 24); | |
| 626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 627 *dst++ = SkToU8(tmp * scale >> 24); | |
| 628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 629 *dst++ = SkToU8(tmp * scale >> 24); | |
| 630 | |
| 631 prev_x += 4; | |
| 632 next_x += 4; | |
| 633 } | |
| 634 #endif | |
| 635 | |
| 636 for (; x < dw - 2*rx; ++x) { | |
| 637 SkASSERT(prev_x >= 0); | |
| 638 SkASSERT(next_x <= sw); | |
| 639 | |
| 640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 641 *dst++ = SkToU8(tmp * scale >> 24); | |
| 642 | |
| 643 prev_x += 1; | |
| 644 next_x += 1; | |
| 645 } | |
| 646 | |
| 647 for (; x < dw; ++x) { | |
| 648 SkASSERT(prev_x >= 0); | |
| 649 SkASSERT(next_x > sw); | |
| 650 | |
| 651 int px = prev_x; | |
| 652 int nx = sw; | |
| 653 | |
| 654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
| 655 *dst++ = SkToU8(tmp * scale >> 24); | |
| 656 | |
| 657 prev_x += 1; | |
| 658 next_x += 1; | |
| 659 } | |
| 660 | |
| 661 prev_y += 1; | |
| 662 next_y += 1; | |
| 663 } | |
| 664 } | |
| 665 | |
| 666 /** | |
| 667 * This is the path for apply_kernel_interp() to be taken when the kernel | |
| 668 * is wider than the source image. | |
| 669 */ | |
| 670 static void kernel_interp_clamped(uint8_t dst[], int rx, int ry, | |
| 671 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { | |
| 672 SkASSERT(2*rx > sw); | |
| 673 | |
| 674 int innerWeight = 255 - outerWeight; | |
| 675 | |
| 676 // round these guys up if they're bigger than 127 | |
| 677 outerWeight += outerWeight >> 7; | |
| 678 innerWeight += innerWeight >> 7; | |
| 679 | |
| 680 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); | |
| 681 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); | |
| 682 | |
| 683 int sumStride = sw + 1; | |
| 684 | |
| 685 int dw = sw + 2*rx; | |
| 686 int dh = sh + 2*ry; | |
| 687 | |
| 688 int prev_y = -2*ry; | |
| 689 int next_y = 1; | |
| 690 | |
| 691 for (int y = 0; y < dh; ++y) { | |
| 692 int py = SkClampPos(prev_y) * sumStride; | |
| 693 int ny = SkFastMin32(next_y, sh) * sumStride; | |
| 694 | |
| 695 int ipy = SkClampPos(prev_y + 1) * sumStride; | |
| 696 int iny = SkClampMax(next_y - 1, sh) * sumStride; | |
| 697 | |
| 698 int prev_x = -2*rx; | |
| 699 int next_x = 1; | |
| 700 | |
| 701 for (int x = 0; x < dw; ++x) { | |
| 702 int px = SkClampPos(prev_x); | |
| 703 int nx = SkFastMin32(next_x, sw); | |
| 704 | |
| 705 int ipx = SkClampPos(prev_x + 1); | |
| 706 int inx = SkClampMax(next_x - 1, sw); | |
| 707 | |
| 708 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
| 709 - sum[nx+py] - sum[px+ny]; | |
| 710 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
| 711 - sum[inx+ipy] - sum[ipx+iny]; | |
| 712 *dst++ = SkToU8((outerSum * outerScale | |
| 713 + innerSum * innerScale) >> 24); | |
| 714 | |
| 715 prev_x += 1; | |
| 716 next_x += 1; | |
| 717 } | |
| 718 prev_y += 1; | |
| 719 next_y += 1; | |
| 720 } | |
| 721 } | |
| 722 | |
| 723 /** | |
| 724 * sw and sh are the width and height of the src. Since the sum buffer | |
| 725 * matches that, but has an extra row and col at the beginning (with zeros), | |
| 726 * we can just use sw and sh as our "max" values for pinning coordinates | |
| 727 * when sampling into sum[][] | |
| 728 * | |
| 729 * The inner loop is conceptually simple; we break it into several variants | |
| 730 * to improve performance. Here's the original version: | |
| 731 for (int x = 0; x < dw; ++x) { | |
| 732 int px = SkClampPos(prev_x); | |
| 733 int nx = SkFastMin32(next_x, sw); | |
| 734 | |
| 735 int ipx = SkClampPos(prev_x + 1); | |
| 736 int inx = SkClampMax(next_x - 1, sw); | |
| 737 | |
| 738 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
| 739 - sum[nx+py] - sum[px+ny]; | |
| 740 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
| 741 - sum[inx+ipy] - sum[ipx+iny]; | |
| 742 *dst++ = SkToU8((outerSum * outerScale | |
| 743 + innerSum * innerScale) >> 24); | |
| 744 | |
| 745 prev_x += 1; | |
| 746 next_x += 1; | |
| 747 } | |
| 748 * The sections are: | |
| 749 * left-hand section, where prev_x is clamped to 0 | |
| 750 * center section, where neither prev_x nor next_x is clamped | |
| 751 * right-hand section, where next_x is clamped to sw | |
| 752 * On some operating systems, the center section is unrolled for additional | |
| 753 * speedup. | |
| 754 */ | |
| 755 static void apply_kernel_interp(uint8_t dst[], int rx, int ry, | |
| 756 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { | |
| 757 SkASSERT(rx > 0 && ry > 0); | |
| 758 SkASSERT(outerWeight <= 255); | |
| 759 | |
| 760 if (2*rx > sw) { | |
| 761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight); | |
| 762 return; | |
| 763 } | |
| 764 | |
| 765 int innerWeight = 255 - outerWeight; | |
| 766 | |
| 767 // round these guys up if they're bigger than 127 | |
| 768 outerWeight += outerWeight >> 7; | |
| 769 innerWeight += innerWeight >> 7; | |
| 770 | |
| 771 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); | |
| 772 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); | |
| 773 | |
| 774 int sumStride = sw + 1; | |
| 775 | |
| 776 int dw = sw + 2*rx; | |
| 777 int dh = sh + 2*ry; | |
| 778 | |
| 779 int prev_y = -2*ry; | |
| 780 int next_y = 1; | |
| 781 | |
| 782 SkASSERT(2*rx <= dw - 2*rx); | |
| 783 | |
| 784 for (int y = 0; y < dh; ++y) { | |
| 785 int py = SkClampPos(prev_y) * sumStride; | |
| 786 int ny = SkFastMin32(next_y, sh) * sumStride; | |
| 787 | |
| 788 int ipy = SkClampPos(prev_y + 1) * sumStride; | |
| 789 int iny = SkClampMax(next_y - 1, sh) * sumStride; | |
| 790 | |
| 791 int prev_x = -2*rx; | |
| 792 int next_x = 1; | |
| 793 int x = 0; | |
| 794 | |
| 795 for (; x < 2*rx; ++x) { | |
| 796 SkASSERT(prev_x < 0); | |
| 797 SkASSERT(next_x <= sw); | |
| 798 | |
| 799 int px = 0; | |
| 800 int nx = next_x; | |
| 801 | |
| 802 int ipx = 0; | |
| 803 int inx = next_x - 1; | |
| 804 | |
| 805 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
| 806 - sum[nx+py] - sum[px+ny]; | |
| 807 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
| 808 - sum[inx+ipy] - sum[ipx+iny]; | |
| 809 *dst++ = SkToU8((outerSum * outerScale | |
| 810 + innerSum * innerScale) >> 24); | |
| 811 | |
| 812 prev_x += 1; | |
| 813 next_x += 1; | |
| 814 } | |
| 815 | |
| 816 int i0 = prev_x + py; | |
| 817 int i1 = next_x + ny; | |
| 818 int i2 = next_x + py; | |
| 819 int i3 = prev_x + ny; | |
| 820 int i4 = prev_x + 1 + ipy; | |
| 821 int i5 = next_x - 1 + iny; | |
| 822 int i6 = next_x - 1 + ipy; | |
| 823 int i7 = prev_x + 1 + iny; | |
| 824 | |
| 825 #if UNROLL_KERNEL_LOOP | |
| 826 for (; x < dw - 2*rx - 4; x += 4) { | |
| 827 SkASSERT(prev_x >= 0); | |
| 828 SkASSERT(next_x <= sw); | |
| 829 | |
| 830 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 831 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
| 832 *dst++ = SkToU8((outerSum * outerScale | |
| 833 + innerSum * innerScale) >> 24); | |
| 834 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 835 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
| 836 *dst++ = SkToU8((outerSum * outerScale | |
| 837 + innerSum * innerScale) >> 24); | |
| 838 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 839 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
| 840 *dst++ = SkToU8((outerSum * outerScale | |
| 841 + innerSum * innerScale) >> 24); | |
| 842 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 843 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
| 844 *dst++ = SkToU8((outerSum * outerScale | |
| 845 + innerSum * innerScale) >> 24); | |
| 846 | |
| 847 prev_x += 4; | |
| 848 next_x += 4; | |
| 849 } | |
| 850 #endif | |
| 851 | |
| 852 for (; x < dw - 2*rx; ++x) { | |
| 853 SkASSERT(prev_x >= 0); | |
| 854 SkASSERT(next_x <= sw); | |
| 855 | |
| 856 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
| 857 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
| 858 *dst++ = SkToU8((outerSum * outerScale | |
| 859 + innerSum * innerScale) >> 24); | |
| 860 | |
| 861 prev_x += 1; | |
| 862 next_x += 1; | |
| 863 } | |
| 864 | |
| 865 for (; x < dw; ++x) { | |
| 866 SkASSERT(prev_x >= 0); | |
| 867 SkASSERT(next_x > sw); | |
| 868 | |
| 869 int px = prev_x; | |
| 870 int nx = sw; | |
| 871 | |
| 872 int ipx = prev_x + 1; | |
| 873 int inx = sw; | |
| 874 | |
| 875 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
| 876 - sum[nx+py] - sum[px+ny]; | |
| 877 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
| 878 - sum[inx+ipy] - sum[ipx+iny]; | |
| 879 *dst++ = SkToU8((outerSum * outerScale | |
| 880 + innerSum * innerScale) >> 24); | |
| 881 | |
| 882 prev_x += 1; | |
| 883 next_x += 1; | |
| 884 } | |
| 885 | |
| 886 prev_y += 1; | |
| 887 next_y += 1; | |
| 888 } | |
| 889 } | |
| 890 | |
| 891 #include "SkColorPriv.h" | 415 #include "SkColorPriv.h" |
| 892 | 416 |
| 893 static void merge_src_with_blur(uint8_t dst[], int dstRB, | 417 static void merge_src_with_blur(uint8_t dst[], int dstRB, |
| 894 const uint8_t src[], int srcRB, | 418 const uint8_t src[], int srcRB, |
| 895 const uint8_t blur[], int blurRB, | 419 const uint8_t blur[], int blurRB, |
| 896 int sw, int sh) { | 420 int sw, int sh) { |
| 897 dstRB -= sw; | 421 dstRB -= sw; |
| 898 srcRB -= sw; | 422 srcRB -= sw; |
| 899 blurRB -= sw; | 423 blurRB -= sw; |
| 900 while (--sh >= 0) { | 424 while (--sh >= 0) { |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 948 | 472 |
| 949 // we use a local function to wrap the class static method to work around | 473 // we use a local function to wrap the class static method to work around |
| 950 // a bug in gcc98 | 474 // a bug in gcc98 |
| 951 void SkMask_FreeImage(uint8_t* image); | 475 void SkMask_FreeImage(uint8_t* image); |
| 952 void SkMask_FreeImage(uint8_t* image) { | 476 void SkMask_FreeImage(uint8_t* image) { |
| 953 SkMask::FreeImage(image); | 477 SkMask::FreeImage(image); |
| 954 } | 478 } |
| 955 | 479 |
| 956 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, | 480 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, |
| 957 SkScalar radius, Style style, Quality quality, | 481 SkScalar radius, Style style, Quality quality, |
| 958 SkIPoint* margin, bool separable) | 482 SkIPoint* margin) |
| 959 { | 483 { |
| 960 | 484 |
| 961 if (src.fFormat != SkMask::kA8_Format) { | 485 if (src.fFormat != SkMask::kA8_Format) { |
| 962 return false; | 486 return false; |
| 963 } | 487 } |
| 964 | 488 |
| 965 // Force high quality off for small radii (performance) | 489 // Force high quality off for small radii (performance) |
| 966 if (radius < SkIntToScalar(3)) { | 490 if (radius < SkIntToScalar(3)) { |
| 967 quality = kLow_Quality; | 491 quality = kLow_Quality; |
| 968 } | 492 } |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1004 return false; // too big to allocate, abort | 528 return false; // too big to allocate, abort |
| 1005 } | 529 } |
| 1006 | 530 |
| 1007 int sw = src.fBounds.width(); | 531 int sw = src.fBounds.width(); |
| 1008 int sh = src.fBounds.height(); | 532 int sh = src.fBounds.height(); |
| 1009 const uint8_t* sp = src.fImage; | 533 const uint8_t* sp = src.fImage; |
| 1010 uint8_t* dp = SkMask::AllocImage(dstSize); | 534 uint8_t* dp = SkMask::AllocImage(dstSize); |
| 1011 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); | 535 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); |
| 1012 | 536 |
| 1013 // build the blurry destination | 537 // build the blurry destination |
| 1014 if (separable) { | 538 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); |
| 1015 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); | 539 uint8_t* tp = tmpBuffer.get(); |
| 1016 uint8_t* tp = tmpBuffer.get(); | 540 int w = sw, h = sh; |
| 1017 int w = sw, h = sh; | |
| 1018 | 541 |
| 1019 if (outerWeight == 255) { | 542 if (outerWeight == 255) { |
| 1020 int loRadius, hiRadius; | 543 int loRadius, hiRadius; |
| 1021 get_adjusted_radii(passRadius, &loRadius, &hiRadius); | 544 get_adjusted_radii(passRadius, &loRadius, &hiRadius); |
| 1022 if (kHigh_Quality == quality) { | 545 if (kHigh_Quality == quality) { |
| 1023 // Do three X blurs, with a transpose on the final one. | 546 // Do three X blurs, with a transpose on the final one. |
| 1024 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h,
false); | 547 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, fal
se); |
| 1025 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h,
false); | 548 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, fal
se); |
| 1026 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h,
true); | 549 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, tru
e); |
| 1027 // Do three Y blurs, with a transpose on the final one. | 550 // Do three Y blurs, with a transpose on the final one. |
| 1028 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w,
false); | 551 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, fal
se); |
| 1029 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w,
false); | 552 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, fal
se); |
| 1030 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w,
true); | 553 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, tru
e); |
| 1031 } else { | |
| 1032 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); | |
| 1033 h = boxBlur(tp, h, dp, ry, ry, h, w, true); | |
| 1034 } | |
| 1035 } else { | 554 } else { |
| 1036 if (kHigh_Quality == quality) { | 555 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); |
| 1037 // Do three X blurs, with a transpose on the final one. | 556 h = boxBlur(tp, h, dp, ry, ry, h, w, true); |
| 1038 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, ou
terWeight); | |
| 1039 w = boxBlurInterp(tp, w, dp, rx, w, h, false, ou
terWeight); | |
| 1040 w = boxBlurInterp(dp, w, tp, rx, w, h, true, out
erWeight); | |
| 1041 // Do three Y blurs, with a transpose on the final one. | |
| 1042 h = boxBlurInterp(tp, h, dp, ry, h, w, false, ou
terWeight); | |
| 1043 h = boxBlurInterp(dp, h, tp, ry, h, w, false, ou
terWeight); | |
| 1044 h = boxBlurInterp(tp, h, dp, ry, h, w, true, out
erWeight); | |
| 1045 } else { | |
| 1046 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, out
erWeight); | |
| 1047 h = boxBlurInterp(tp, h, dp, ry, h, w, true, out
erWeight); | |
| 1048 } | |
| 1049 } | 557 } |
| 1050 } else { | 558 } else { |
| 1051 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1; | 559 if (kHigh_Quality == quality) { |
| 1052 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1; | 560 // Do three X blurs, with a transpose on the final one. |
| 1053 SkAutoTMalloc<uint32_t> storage(storageW * storageH); | 561 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerW
eight); |
| 1054 uint32_t* sumBuffer = storage.get(); | 562 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerW
eight); |
| 1055 | 563 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWe
ight); |
| 1056 //pass1: sp is source, dp is destination | 564 // Do three Y blurs, with a transpose on the final one. |
| 1057 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes); | 565 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerW
eight); |
| 1058 if (outerWeight == 255) { | 566 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerW
eight); |
| 1059 apply_kernel(dp, rx, ry, sumBuffer, sw, sh); | 567 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWe
ight); |
| 1060 } else { | 568 } else { |
| 1061 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight); | 569 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWe
ight); |
| 1062 } | 570 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWe
ight); |
| 1063 | |
| 1064 if (kHigh_Quality == quality) { | |
| 1065 //pass2: dp is source, tmpBuffer is destination | |
| 1066 int tmp_sw = sw + 2 * rx; | |
| 1067 int tmp_sh = sh + 2 * ry; | |
| 1068 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); | |
| 1069 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw); | |
| 1070 if (outerWeight == 255) | |
| 1071 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp
_sh); | |
| 1072 else | |
| 1073 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer, | |
| 1074 tmp_sw, tmp_sh, outerWeight); | |
| 1075 | |
| 1076 //pass3: tmpBuffer is source, dp is destination | |
| 1077 tmp_sw += 2 * rx; | |
| 1078 tmp_sh += 2 * ry; | |
| 1079 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp
_sw); | |
| 1080 if (outerWeight == 255) | |
| 1081 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh); | |
| 1082 else | |
| 1083 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh, | |
| 1084 outerWeight); | |
| 1085 } | 571 } |
| 1086 } | 572 } |
| 1087 | 573 |
| 1088 dst->fImage = dp; | 574 dst->fImage = dp; |
| 1089 // if need be, alloc the "real" dst (same size as src) and copy/merge | 575 // if need be, alloc the "real" dst (same size as src) and copy/merge |
| 1090 // the blur into it (applying the src) | 576 // the blur into it (applying the src) |
| 1091 if (style == kInner_Style) { | 577 if (style == kInner_Style) { |
| 1092 // now we allocate the "real" dst, mirror the size of src | 578 // now we allocate the "real" dst, mirror the size of src |
| 1093 size_t srcSize = src.computeImageSize(); | 579 size_t srcSize = src.computeImageSize(); |
| 1094 if (0 == srcSize) { | 580 if (0 == srcSize) { |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1108 } | 594 } |
| 1109 | 595 |
| 1110 if (style == kInner_Style) { | 596 if (style == kInner_Style) { |
| 1111 dst->fBounds = src.fBounds; // restore trimmed bounds | 597 dst->fBounds = src.fBounds; // restore trimmed bounds |
| 1112 dst->fRowBytes = src.fRowBytes; | 598 dst->fRowBytes = src.fRowBytes; |
| 1113 } | 599 } |
| 1114 | 600 |
| 1115 return true; | 601 return true; |
| 1116 } | 602 } |
| 1117 | 603 |
| 1118 bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src, | |
| 1119 SkScalar radius, Style style, Quality quality, | |
| 1120 SkIPoint* margin) | |
| 1121 { | |
| 1122 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true); | |
| 1123 } | |
| 1124 | |
| 1125 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, | |
| 1126 SkScalar radius, Style style, Quality quality, | |
| 1127 SkIPoint* margin) | |
| 1128 { | |
| 1129 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false); | |
| 1130 } | |
| 1131 | |
| 1132 /* Convolving a box with itself three times results in a piecewise | 604 /* Convolving a box with itself three times results in a piecewise |
| 1133 quadratic function: | 605 quadratic function: |
| 1134 | 606 |
| 1135 0 x <= -1.5 | 607 0 x <= -1.5 |
| 1136 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 | 608 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 |
| 1137 3/4 - x^2 -.5 < x <= .5 | 609 3/4 - x^2 -.5 < x <= .5 |
| 1138 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 | 610 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 |
| 1139 0 1.5 < x | 611 0 1.5 < x |
| 1140 | 612 |
| 1141 Mathematica: | 613 Mathematica: |
| (...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1510 (void)autoCall.detach(); | 982 (void)autoCall.detach(); |
| 1511 } | 983 } |
| 1512 | 984 |
| 1513 if (style == kInner_Style) { | 985 if (style == kInner_Style) { |
| 1514 dst->fBounds = src.fBounds; // restore trimmed bounds | 986 dst->fBounds = src.fBounds; // restore trimmed bounds |
| 1515 dst->fRowBytes = src.fRowBytes; | 987 dst->fRowBytes = src.fRowBytes; |
| 1516 } | 988 } |
| 1517 | 989 |
| 1518 return true; | 990 return true; |
| 1519 } | 991 } |
| OLD | NEW |