OLD | NEW |
1 | 1 |
2 /* | 2 /* |
3 * Copyright 2006 The Android Open Source Project | 3 * Copyright 2006 The Android Open Source Project |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
7 */ | 7 */ |
8 | 8 |
9 | 9 |
10 #include "SkBlurMask.h" | 10 #include "SkBlurMask.h" |
(...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
405 } | 405 } |
406 | 406 |
407 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius
) | 407 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius
) |
408 { | 408 { |
409 *loRadius = *hiRadius = SkScalarCeil(passRadius); | 409 *loRadius = *hiRadius = SkScalarCeil(passRadius); |
410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { | 410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { |
411 *loRadius = *hiRadius - 1; | 411 *loRadius = *hiRadius - 1; |
412 } | 412 } |
413 } | 413 } |
414 | 414 |
415 // Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows, | |
416 // breakeven on Mac, and ~15% slowdown on Linux. | |
417 // Reading a word at a time when bulding the sum buffer seems to give | |
418 // us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux. | |
419 #if defined(SK_BUILD_FOR_WIN32) | |
420 #define UNROLL_KERNEL_LOOP 1 | |
421 #endif | |
422 | |
423 /** The sum buffer is an array of u32 to hold the accumulated sum of all of the | |
424 src values at their position, plus all values above and to the left. | |
425 When we sample into this buffer, we need an initial row and column of 0s, | |
426 so we have an index correspondence as follows: | |
427 | |
428 src[i, j] == sum[i+1, j+1] | |
429 sum[0, j] == sum[i, 0] == 0 | |
430 | |
431 We assume that the sum buffer's stride == its width | |
432 */ | |
433 static void build_sum_buffer(uint32_t sum[], int srcW, int srcH, | |
434 const uint8_t src[], int srcRB) { | |
435 int sumW = srcW + 1; | |
436 | |
437 SkASSERT(srcRB >= srcW); | |
438 // mod srcRB so we can apply it after each row | |
439 srcRB -= srcW; | |
440 | |
441 int x, y; | |
442 | |
443 // zero out the top row and column | |
444 memset(sum, 0, sumW * sizeof(sum[0])); | |
445 sum += sumW; | |
446 | |
447 // special case first row | |
448 uint32_t X = 0; | |
449 *sum++ = 0; // initialze the first column to 0 | |
450 for (x = srcW - 1; x >= 0; --x) { | |
451 X = *src++ + X; | |
452 *sum++ = X; | |
453 } | |
454 src += srcRB; | |
455 | |
456 // now do the rest of the rows | |
457 for (y = srcH - 1; y > 0; --y) { | |
458 uint32_t L = 0; | |
459 uint32_t C = 0; | |
460 *sum++ = 0; // initialze the first column to 0 | |
461 | |
462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) { | |
463 uint32_t T = sum[-sumW]; | |
464 X = *src++ + L + T - C; | |
465 *sum++ = X; | |
466 L = X; | |
467 C = T; | |
468 } | |
469 | |
470 for (; x >= 4; x-=4) { | |
471 uint32_t T = sum[-sumW]; | |
472 X = *src++ + L + T - C; | |
473 *sum++ = X; | |
474 L = X; | |
475 C = T; | |
476 T = sum[-sumW]; | |
477 X = *src++ + L + T - C; | |
478 *sum++ = X; | |
479 L = X; | |
480 C = T; | |
481 T = sum[-sumW]; | |
482 X = *src++ + L + T - C; | |
483 *sum++ = X; | |
484 L = X; | |
485 C = T; | |
486 T = sum[-sumW]; | |
487 X = *src++ + L + T - C; | |
488 *sum++ = X; | |
489 L = X; | |
490 C = T; | |
491 } | |
492 | |
493 for (; x >= 0; --x) { | |
494 uint32_t T = sum[-sumW]; | |
495 X = *src++ + L + T - C; | |
496 *sum++ = X; | |
497 L = X; | |
498 C = T; | |
499 } | |
500 src += srcRB; | |
501 } | |
502 } | |
503 | |
504 /** | |
505 * This is the path for apply_kernel() to be taken when the kernel | |
506 * is wider than the source image. | |
507 */ | |
508 static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], | |
509 int sw, int sh) { | |
510 SkASSERT(2*rx > sw); | |
511 | |
512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); | |
513 | |
514 int sumStride = sw + 1; | |
515 | |
516 int dw = sw + 2*rx; | |
517 int dh = sh + 2*ry; | |
518 | |
519 int prev_y = -2*ry; | |
520 int next_y = 1; | |
521 | |
522 for (int y = 0; y < dh; ++y) { | |
523 int py = SkClampPos(prev_y) * sumStride; | |
524 int ny = SkFastMin32(next_y, sh) * sumStride; | |
525 | |
526 int prev_x = -2*rx; | |
527 int next_x = 1; | |
528 | |
529 for (int x = 0; x < dw; ++x) { | |
530 int px = SkClampPos(prev_x); | |
531 int nx = SkFastMin32(next_x, sw); | |
532 | |
533 // TODO: should we be adding 1/2 (1 << 23) to round to the | |
534 // nearest integer here? | |
535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
536 *dst++ = SkToU8(tmp * scale >> 24); | |
537 | |
538 prev_x += 1; | |
539 next_x += 1; | |
540 } | |
541 | |
542 prev_y += 1; | |
543 next_y += 1; | |
544 } | |
545 } | |
546 /** | |
547 * sw and sh are the width and height of the src. Since the sum buffer | |
548 * matches that, but has an extra row and col at the beginning (with zeros), | |
549 * we can just use sw and sh as our "max" values for pinning coordinates | |
550 * when sampling into sum[][] | |
551 * | |
552 * The inner loop is conceptually simple; we break it into several sections | |
553 * to improve performance. Here's the original version: | |
554 for (int x = 0; x < dw; ++x) { | |
555 int px = SkClampPos(prev_x); | |
556 int nx = SkFastMin32(next_x, sw); | |
557 | |
558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
559 *dst++ = SkToU8(tmp * scale >> 24); | |
560 | |
561 prev_x += 1; | |
562 next_x += 1; | |
563 } | |
564 * The sections are: | |
565 * left-hand section, where prev_x is clamped to 0 | |
566 * center section, where neither prev_x nor next_x is clamped | |
567 * right-hand section, where next_x is clamped to sw | |
568 * On some operating systems, the center section is unrolled for additional | |
569 * speedup. | |
570 */ | |
571 static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[], | |
572 int sw, int sh) { | |
573 if (2*rx > sw) { | |
574 kernel_clamped(dst, rx, ry, sum, sw, sh); | |
575 return; | |
576 } | |
577 | |
578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); | |
579 | |
580 int sumStride = sw + 1; | |
581 | |
582 int dw = sw + 2*rx; | |
583 int dh = sh + 2*ry; | |
584 | |
585 int prev_y = -2*ry; | |
586 int next_y = 1; | |
587 | |
588 SkASSERT(2*rx <= dw - 2*rx); | |
589 | |
590 for (int y = 0; y < dh; ++y) { | |
591 int py = SkClampPos(prev_y) * sumStride; | |
592 int ny = SkFastMin32(next_y, sh) * sumStride; | |
593 | |
594 int prev_x = -2*rx; | |
595 int next_x = 1; | |
596 int x = 0; | |
597 | |
598 for (; x < 2*rx; ++x) { | |
599 SkASSERT(prev_x <= 0); | |
600 SkASSERT(next_x <= sw); | |
601 | |
602 int px = 0; | |
603 int nx = next_x; | |
604 | |
605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
606 *dst++ = SkToU8(tmp * scale >> 24); | |
607 | |
608 prev_x += 1; | |
609 next_x += 1; | |
610 } | |
611 | |
612 int i0 = prev_x + py; | |
613 int i1 = next_x + ny; | |
614 int i2 = next_x + py; | |
615 int i3 = prev_x + ny; | |
616 | |
617 #if UNROLL_KERNEL_LOOP | |
618 for (; x < dw - 2*rx - 4; x += 4) { | |
619 SkASSERT(prev_x >= 0); | |
620 SkASSERT(next_x <= sw); | |
621 | |
622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
623 *dst++ = SkToU8(tmp * scale >> 24); | |
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
625 *dst++ = SkToU8(tmp * scale >> 24); | |
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
627 *dst++ = SkToU8(tmp * scale >> 24); | |
628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
629 *dst++ = SkToU8(tmp * scale >> 24); | |
630 | |
631 prev_x += 4; | |
632 next_x += 4; | |
633 } | |
634 #endif | |
635 | |
636 for (; x < dw - 2*rx; ++x) { | |
637 SkASSERT(prev_x >= 0); | |
638 SkASSERT(next_x <= sw); | |
639 | |
640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
641 *dst++ = SkToU8(tmp * scale >> 24); | |
642 | |
643 prev_x += 1; | |
644 next_x += 1; | |
645 } | |
646 | |
647 for (; x < dw; ++x) { | |
648 SkASSERT(prev_x >= 0); | |
649 SkASSERT(next_x > sw); | |
650 | |
651 int px = prev_x; | |
652 int nx = sw; | |
653 | |
654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; | |
655 *dst++ = SkToU8(tmp * scale >> 24); | |
656 | |
657 prev_x += 1; | |
658 next_x += 1; | |
659 } | |
660 | |
661 prev_y += 1; | |
662 next_y += 1; | |
663 } | |
664 } | |
665 | |
666 /** | |
667 * This is the path for apply_kernel_interp() to be taken when the kernel | |
668 * is wider than the source image. | |
669 */ | |
670 static void kernel_interp_clamped(uint8_t dst[], int rx, int ry, | |
671 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { | |
672 SkASSERT(2*rx > sw); | |
673 | |
674 int innerWeight = 255 - outerWeight; | |
675 | |
676 // round these guys up if they're bigger than 127 | |
677 outerWeight += outerWeight >> 7; | |
678 innerWeight += innerWeight >> 7; | |
679 | |
680 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); | |
681 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); | |
682 | |
683 int sumStride = sw + 1; | |
684 | |
685 int dw = sw + 2*rx; | |
686 int dh = sh + 2*ry; | |
687 | |
688 int prev_y = -2*ry; | |
689 int next_y = 1; | |
690 | |
691 for (int y = 0; y < dh; ++y) { | |
692 int py = SkClampPos(prev_y) * sumStride; | |
693 int ny = SkFastMin32(next_y, sh) * sumStride; | |
694 | |
695 int ipy = SkClampPos(prev_y + 1) * sumStride; | |
696 int iny = SkClampMax(next_y - 1, sh) * sumStride; | |
697 | |
698 int prev_x = -2*rx; | |
699 int next_x = 1; | |
700 | |
701 for (int x = 0; x < dw; ++x) { | |
702 int px = SkClampPos(prev_x); | |
703 int nx = SkFastMin32(next_x, sw); | |
704 | |
705 int ipx = SkClampPos(prev_x + 1); | |
706 int inx = SkClampMax(next_x - 1, sw); | |
707 | |
708 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
709 - sum[nx+py] - sum[px+ny]; | |
710 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
711 - sum[inx+ipy] - sum[ipx+iny]; | |
712 *dst++ = SkToU8((outerSum * outerScale | |
713 + innerSum * innerScale) >> 24); | |
714 | |
715 prev_x += 1; | |
716 next_x += 1; | |
717 } | |
718 prev_y += 1; | |
719 next_y += 1; | |
720 } | |
721 } | |
722 | |
723 /** | |
724 * sw and sh are the width and height of the src. Since the sum buffer | |
725 * matches that, but has an extra row and col at the beginning (with zeros), | |
726 * we can just use sw and sh as our "max" values for pinning coordinates | |
727 * when sampling into sum[][] | |
728 * | |
729 * The inner loop is conceptually simple; we break it into several variants | |
730 * to improve performance. Here's the original version: | |
731 for (int x = 0; x < dw; ++x) { | |
732 int px = SkClampPos(prev_x); | |
733 int nx = SkFastMin32(next_x, sw); | |
734 | |
735 int ipx = SkClampPos(prev_x + 1); | |
736 int inx = SkClampMax(next_x - 1, sw); | |
737 | |
738 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
739 - sum[nx+py] - sum[px+ny]; | |
740 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
741 - sum[inx+ipy] - sum[ipx+iny]; | |
742 *dst++ = SkToU8((outerSum * outerScale | |
743 + innerSum * innerScale) >> 24); | |
744 | |
745 prev_x += 1; | |
746 next_x += 1; | |
747 } | |
748 * The sections are: | |
749 * left-hand section, where prev_x is clamped to 0 | |
750 * center section, where neither prev_x nor next_x is clamped | |
751 * right-hand section, where next_x is clamped to sw | |
752 * On some operating systems, the center section is unrolled for additional | |
753 * speedup. | |
754 */ | |
755 static void apply_kernel_interp(uint8_t dst[], int rx, int ry, | |
756 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { | |
757 SkASSERT(rx > 0 && ry > 0); | |
758 SkASSERT(outerWeight <= 255); | |
759 | |
760 if (2*rx > sw) { | |
761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight); | |
762 return; | |
763 } | |
764 | |
765 int innerWeight = 255 - outerWeight; | |
766 | |
767 // round these guys up if they're bigger than 127 | |
768 outerWeight += outerWeight >> 7; | |
769 innerWeight += innerWeight >> 7; | |
770 | |
771 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); | |
772 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); | |
773 | |
774 int sumStride = sw + 1; | |
775 | |
776 int dw = sw + 2*rx; | |
777 int dh = sh + 2*ry; | |
778 | |
779 int prev_y = -2*ry; | |
780 int next_y = 1; | |
781 | |
782 SkASSERT(2*rx <= dw - 2*rx); | |
783 | |
784 for (int y = 0; y < dh; ++y) { | |
785 int py = SkClampPos(prev_y) * sumStride; | |
786 int ny = SkFastMin32(next_y, sh) * sumStride; | |
787 | |
788 int ipy = SkClampPos(prev_y + 1) * sumStride; | |
789 int iny = SkClampMax(next_y - 1, sh) * sumStride; | |
790 | |
791 int prev_x = -2*rx; | |
792 int next_x = 1; | |
793 int x = 0; | |
794 | |
795 for (; x < 2*rx; ++x) { | |
796 SkASSERT(prev_x < 0); | |
797 SkASSERT(next_x <= sw); | |
798 | |
799 int px = 0; | |
800 int nx = next_x; | |
801 | |
802 int ipx = 0; | |
803 int inx = next_x - 1; | |
804 | |
805 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
806 - sum[nx+py] - sum[px+ny]; | |
807 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
808 - sum[inx+ipy] - sum[ipx+iny]; | |
809 *dst++ = SkToU8((outerSum * outerScale | |
810 + innerSum * innerScale) >> 24); | |
811 | |
812 prev_x += 1; | |
813 next_x += 1; | |
814 } | |
815 | |
816 int i0 = prev_x + py; | |
817 int i1 = next_x + ny; | |
818 int i2 = next_x + py; | |
819 int i3 = prev_x + ny; | |
820 int i4 = prev_x + 1 + ipy; | |
821 int i5 = next_x - 1 + iny; | |
822 int i6 = next_x - 1 + ipy; | |
823 int i7 = prev_x + 1 + iny; | |
824 | |
825 #if UNROLL_KERNEL_LOOP | |
826 for (; x < dw - 2*rx - 4; x += 4) { | |
827 SkASSERT(prev_x >= 0); | |
828 SkASSERT(next_x <= sw); | |
829 | |
830 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
831 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
832 *dst++ = SkToU8((outerSum * outerScale | |
833 + innerSum * innerScale) >> 24); | |
834 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
835 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
836 *dst++ = SkToU8((outerSum * outerScale | |
837 + innerSum * innerScale) >> 24); | |
838 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
839 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
840 *dst++ = SkToU8((outerSum * outerScale | |
841 + innerSum * innerScale) >> 24); | |
842 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
843 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
844 *dst++ = SkToU8((outerSum * outerScale | |
845 + innerSum * innerScale) >> 24); | |
846 | |
847 prev_x += 4; | |
848 next_x += 4; | |
849 } | |
850 #endif | |
851 | |
852 for (; x < dw - 2*rx; ++x) { | |
853 SkASSERT(prev_x >= 0); | |
854 SkASSERT(next_x <= sw); | |
855 | |
856 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; | |
857 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; | |
858 *dst++ = SkToU8((outerSum * outerScale | |
859 + innerSum * innerScale) >> 24); | |
860 | |
861 prev_x += 1; | |
862 next_x += 1; | |
863 } | |
864 | |
865 for (; x < dw; ++x) { | |
866 SkASSERT(prev_x >= 0); | |
867 SkASSERT(next_x > sw); | |
868 | |
869 int px = prev_x; | |
870 int nx = sw; | |
871 | |
872 int ipx = prev_x + 1; | |
873 int inx = sw; | |
874 | |
875 uint32_t outerSum = sum[px+py] + sum[nx+ny] | |
876 - sum[nx+py] - sum[px+ny]; | |
877 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] | |
878 - sum[inx+ipy] - sum[ipx+iny]; | |
879 *dst++ = SkToU8((outerSum * outerScale | |
880 + innerSum * innerScale) >> 24); | |
881 | |
882 prev_x += 1; | |
883 next_x += 1; | |
884 } | |
885 | |
886 prev_y += 1; | |
887 next_y += 1; | |
888 } | |
889 } | |
890 | |
891 #include "SkColorPriv.h" | 415 #include "SkColorPriv.h" |
892 | 416 |
893 static void merge_src_with_blur(uint8_t dst[], int dstRB, | 417 static void merge_src_with_blur(uint8_t dst[], int dstRB, |
894 const uint8_t src[], int srcRB, | 418 const uint8_t src[], int srcRB, |
895 const uint8_t blur[], int blurRB, | 419 const uint8_t blur[], int blurRB, |
896 int sw, int sh) { | 420 int sw, int sh) { |
897 dstRB -= sw; | 421 dstRB -= sw; |
898 srcRB -= sw; | 422 srcRB -= sw; |
899 blurRB -= sw; | 423 blurRB -= sw; |
900 while (--sh >= 0) { | 424 while (--sh >= 0) { |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
948 | 472 |
949 // we use a local function to wrap the class static method to work around | 473 // we use a local function to wrap the class static method to work around |
950 // a bug in gcc98 | 474 // a bug in gcc98 |
951 void SkMask_FreeImage(uint8_t* image); | 475 void SkMask_FreeImage(uint8_t* image); |
952 void SkMask_FreeImage(uint8_t* image) { | 476 void SkMask_FreeImage(uint8_t* image) { |
953 SkMask::FreeImage(image); | 477 SkMask::FreeImage(image); |
954 } | 478 } |
955 | 479 |
956 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, | 480 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, |
957 SkScalar radius, Style style, Quality quality, | 481 SkScalar radius, Style style, Quality quality, |
958 SkIPoint* margin, bool separable) | 482 SkIPoint* margin) |
959 { | 483 { |
960 | 484 |
961 if (src.fFormat != SkMask::kA8_Format) { | 485 if (src.fFormat != SkMask::kA8_Format) { |
962 return false; | 486 return false; |
963 } | 487 } |
964 | 488 |
965 // Force high quality off for small radii (performance) | 489 // Force high quality off for small radii (performance) |
966 if (radius < SkIntToScalar(3)) { | 490 if (radius < SkIntToScalar(3)) { |
967 quality = kLow_Quality; | 491 quality = kLow_Quality; |
968 } | 492 } |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1004 return false; // too big to allocate, abort | 528 return false; // too big to allocate, abort |
1005 } | 529 } |
1006 | 530 |
1007 int sw = src.fBounds.width(); | 531 int sw = src.fBounds.width(); |
1008 int sh = src.fBounds.height(); | 532 int sh = src.fBounds.height(); |
1009 const uint8_t* sp = src.fImage; | 533 const uint8_t* sp = src.fImage; |
1010 uint8_t* dp = SkMask::AllocImage(dstSize); | 534 uint8_t* dp = SkMask::AllocImage(dstSize); |
1011 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); | 535 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); |
1012 | 536 |
1013 // build the blurry destination | 537 // build the blurry destination |
1014 if (separable) { | 538 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); |
1015 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); | 539 uint8_t* tp = tmpBuffer.get(); |
1016 uint8_t* tp = tmpBuffer.get(); | 540 int w = sw, h = sh; |
1017 int w = sw, h = sh; | |
1018 | 541 |
1019 if (outerWeight == 255) { | 542 if (outerWeight == 255) { |
1020 int loRadius, hiRadius; | 543 int loRadius, hiRadius; |
1021 get_adjusted_radii(passRadius, &loRadius, &hiRadius); | 544 get_adjusted_radii(passRadius, &loRadius, &hiRadius); |
1022 if (kHigh_Quality == quality) { | 545 if (kHigh_Quality == quality) { |
1023 // Do three X blurs, with a transpose on the final one. | 546 // Do three X blurs, with a transpose on the final one. |
1024 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h,
false); | 547 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, fal
se); |
1025 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h,
false); | 548 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, fal
se); |
1026 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h,
true); | 549 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, tru
e); |
1027 // Do three Y blurs, with a transpose on the final one. | 550 // Do three Y blurs, with a transpose on the final one. |
1028 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w,
false); | 551 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, fal
se); |
1029 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w,
false); | 552 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, fal
se); |
1030 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w,
true); | 553 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, tru
e); |
1031 } else { | |
1032 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); | |
1033 h = boxBlur(tp, h, dp, ry, ry, h, w, true); | |
1034 } | |
1035 } else { | 554 } else { |
1036 if (kHigh_Quality == quality) { | 555 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); |
1037 // Do three X blurs, with a transpose on the final one. | 556 h = boxBlur(tp, h, dp, ry, ry, h, w, true); |
1038 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, ou
terWeight); | |
1039 w = boxBlurInterp(tp, w, dp, rx, w, h, false, ou
terWeight); | |
1040 w = boxBlurInterp(dp, w, tp, rx, w, h, true, out
erWeight); | |
1041 // Do three Y blurs, with a transpose on the final one. | |
1042 h = boxBlurInterp(tp, h, dp, ry, h, w, false, ou
terWeight); | |
1043 h = boxBlurInterp(dp, h, tp, ry, h, w, false, ou
terWeight); | |
1044 h = boxBlurInterp(tp, h, dp, ry, h, w, true, out
erWeight); | |
1045 } else { | |
1046 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, out
erWeight); | |
1047 h = boxBlurInterp(tp, h, dp, ry, h, w, true, out
erWeight); | |
1048 } | |
1049 } | 557 } |
1050 } else { | 558 } else { |
1051 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1; | 559 if (kHigh_Quality == quality) { |
1052 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1; | 560 // Do three X blurs, with a transpose on the final one. |
1053 SkAutoTMalloc<uint32_t> storage(storageW * storageH); | 561 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerW
eight); |
1054 uint32_t* sumBuffer = storage.get(); | 562 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerW
eight); |
1055 | 563 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWe
ight); |
1056 //pass1: sp is source, dp is destination | 564 // Do three Y blurs, with a transpose on the final one. |
1057 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes); | 565 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerW
eight); |
1058 if (outerWeight == 255) { | 566 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerW
eight); |
1059 apply_kernel(dp, rx, ry, sumBuffer, sw, sh); | 567 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWe
ight); |
1060 } else { | 568 } else { |
1061 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight); | 569 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWe
ight); |
1062 } | 570 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWe
ight); |
1063 | |
1064 if (kHigh_Quality == quality) { | |
1065 //pass2: dp is source, tmpBuffer is destination | |
1066 int tmp_sw = sw + 2 * rx; | |
1067 int tmp_sh = sh + 2 * ry; | |
1068 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); | |
1069 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw); | |
1070 if (outerWeight == 255) | |
1071 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp
_sh); | |
1072 else | |
1073 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer, | |
1074 tmp_sw, tmp_sh, outerWeight); | |
1075 | |
1076 //pass3: tmpBuffer is source, dp is destination | |
1077 tmp_sw += 2 * rx; | |
1078 tmp_sh += 2 * ry; | |
1079 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp
_sw); | |
1080 if (outerWeight == 255) | |
1081 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh); | |
1082 else | |
1083 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh, | |
1084 outerWeight); | |
1085 } | 571 } |
1086 } | 572 } |
1087 | 573 |
1088 dst->fImage = dp; | 574 dst->fImage = dp; |
1089 // if need be, alloc the "real" dst (same size as src) and copy/merge | 575 // if need be, alloc the "real" dst (same size as src) and copy/merge |
1090 // the blur into it (applying the src) | 576 // the blur into it (applying the src) |
1091 if (style == kInner_Style) { | 577 if (style == kInner_Style) { |
1092 // now we allocate the "real" dst, mirror the size of src | 578 // now we allocate the "real" dst, mirror the size of src |
1093 size_t srcSize = src.computeImageSize(); | 579 size_t srcSize = src.computeImageSize(); |
1094 if (0 == srcSize) { | 580 if (0 == srcSize) { |
(...skipping 13 matching lines...) Expand all Loading... |
1108 } | 594 } |
1109 | 595 |
1110 if (style == kInner_Style) { | 596 if (style == kInner_Style) { |
1111 dst->fBounds = src.fBounds; // restore trimmed bounds | 597 dst->fBounds = src.fBounds; // restore trimmed bounds |
1112 dst->fRowBytes = src.fRowBytes; | 598 dst->fRowBytes = src.fRowBytes; |
1113 } | 599 } |
1114 | 600 |
1115 return true; | 601 return true; |
1116 } | 602 } |
1117 | 603 |
1118 bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src, | |
1119 SkScalar radius, Style style, Quality quality, | |
1120 SkIPoint* margin) | |
1121 { | |
1122 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true); | |
1123 } | |
1124 | |
1125 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, | |
1126 SkScalar radius, Style style, Quality quality, | |
1127 SkIPoint* margin) | |
1128 { | |
1129 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false); | |
1130 } | |
1131 | |
1132 /* Convolving a box with itself three times results in a piecewise | 604 /* Convolving a box with itself three times results in a piecewise |
1133 quadratic function: | 605 quadratic function: |
1134 | 606 |
1135 0 x <= -1.5 | 607 0 x <= -1.5 |
1136 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 | 608 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 |
1137 3/4 - x^2 -.5 < x <= .5 | 609 3/4 - x^2 -.5 < x <= .5 |
1138 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 | 610 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 |
1139 0 1.5 < x | 611 0 1.5 < x |
1140 | 612 |
1141 Mathematica: | 613 Mathematica: |
(...skipping 368 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1510 (void)autoCall.detach(); | 982 (void)autoCall.detach(); |
1511 } | 983 } |
1512 | 984 |
1513 if (style == kInner_Style) { | 985 if (style == kInner_Style) { |
1514 dst->fBounds = src.fBounds; // restore trimmed bounds | 986 dst->fBounds = src.fBounds; // restore trimmed bounds |
1515 dst->fRowBytes = src.fRowBytes; | 987 dst->fRowBytes = src.fRowBytes; |
1516 } | 988 } |
1517 | 989 |
1518 return true; | 990 return true; |
1519 } | 991 } |
OLD | NEW |