| Index: src/effects/SkBlurMask.cpp
|
| ===================================================================
|
| --- src/effects/SkBlurMask.cpp (revision 9493)
|
| +++ src/effects/SkBlurMask.cpp (working copy)
|
| @@ -412,482 +412,6 @@
|
| }
|
| }
|
|
|
| -// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
|
| -// breakeven on Mac, and ~15% slowdown on Linux.
|
| -// Reading a word at a time when bulding the sum buffer seems to give
|
| -// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
|
| -#if defined(SK_BUILD_FOR_WIN32)
|
| -#define UNROLL_KERNEL_LOOP 1
|
| -#endif
|
| -
|
| -/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
|
| - src values at their position, plus all values above and to the left.
|
| - When we sample into this buffer, we need an initial row and column of 0s,
|
| - so we have an index correspondence as follows:
|
| -
|
| - src[i, j] == sum[i+1, j+1]
|
| - sum[0, j] == sum[i, 0] == 0
|
| -
|
| - We assume that the sum buffer's stride == its width
|
| - */
|
| -static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
|
| - const uint8_t src[], int srcRB) {
|
| - int sumW = srcW + 1;
|
| -
|
| - SkASSERT(srcRB >= srcW);
|
| - // mod srcRB so we can apply it after each row
|
| - srcRB -= srcW;
|
| -
|
| - int x, y;
|
| -
|
| - // zero out the top row and column
|
| - memset(sum, 0, sumW * sizeof(sum[0]));
|
| - sum += sumW;
|
| -
|
| - // special case first row
|
| - uint32_t X = 0;
|
| - *sum++ = 0; // initialze the first column to 0
|
| - for (x = srcW - 1; x >= 0; --x) {
|
| - X = *src++ + X;
|
| - *sum++ = X;
|
| - }
|
| - src += srcRB;
|
| -
|
| - // now do the rest of the rows
|
| - for (y = srcH - 1; y > 0; --y) {
|
| - uint32_t L = 0;
|
| - uint32_t C = 0;
|
| - *sum++ = 0; // initialze the first column to 0
|
| -
|
| - for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
|
| - uint32_t T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - }
|
| -
|
| - for (; x >= 4; x-=4) {
|
| - uint32_t T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - }
|
| -
|
| - for (; x >= 0; --x) {
|
| - uint32_t T = sum[-sumW];
|
| - X = *src++ + L + T - C;
|
| - *sum++ = X;
|
| - L = X;
|
| - C = T;
|
| - }
|
| - src += srcRB;
|
| - }
|
| -}
|
| -
|
| -/**
|
| - * This is the path for apply_kernel() to be taken when the kernel
|
| - * is wider than the source image.
|
| - */
|
| -static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
|
| - int sw, int sh) {
|
| - SkASSERT(2*rx > sw);
|
| -
|
| - uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
|
| -
|
| - int sumStride = sw + 1;
|
| -
|
| - int dw = sw + 2*rx;
|
| - int dh = sh + 2*ry;
|
| -
|
| - int prev_y = -2*ry;
|
| - int next_y = 1;
|
| -
|
| - for (int y = 0; y < dh; ++y) {
|
| - int py = SkClampPos(prev_y) * sumStride;
|
| - int ny = SkFastMin32(next_y, sh) * sumStride;
|
| -
|
| - int prev_x = -2*rx;
|
| - int next_x = 1;
|
| -
|
| - for (int x = 0; x < dw; ++x) {
|
| - int px = SkClampPos(prev_x);
|
| - int nx = SkFastMin32(next_x, sw);
|
| -
|
| - // TODO: should we be adding 1/2 (1 << 23) to round to the
|
| - // nearest integer here?
|
| - uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - prev_y += 1;
|
| - next_y += 1;
|
| - }
|
| -}
|
| -/**
|
| - * sw and sh are the width and height of the src. Since the sum buffer
|
| - * matches that, but has an extra row and col at the beginning (with zeros),
|
| - * we can just use sw and sh as our "max" values for pinning coordinates
|
| - * when sampling into sum[][]
|
| - *
|
| - * The inner loop is conceptually simple; we break it into several sections
|
| - * to improve performance. Here's the original version:
|
| - for (int x = 0; x < dw; ++x) {
|
| - int px = SkClampPos(prev_x);
|
| - int nx = SkFastMin32(next_x, sw);
|
| -
|
| - uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| - * The sections are:
|
| - * left-hand section, where prev_x is clamped to 0
|
| - * center section, where neither prev_x nor next_x is clamped
|
| - * right-hand section, where next_x is clamped to sw
|
| - * On some operating systems, the center section is unrolled for additional
|
| - * speedup.
|
| -*/
|
| -static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
|
| - int sw, int sh) {
|
| - if (2*rx > sw) {
|
| - kernel_clamped(dst, rx, ry, sum, sw, sh);
|
| - return;
|
| - }
|
| -
|
| - uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
|
| -
|
| - int sumStride = sw + 1;
|
| -
|
| - int dw = sw + 2*rx;
|
| - int dh = sh + 2*ry;
|
| -
|
| - int prev_y = -2*ry;
|
| - int next_y = 1;
|
| -
|
| - SkASSERT(2*rx <= dw - 2*rx);
|
| -
|
| - for (int y = 0; y < dh; ++y) {
|
| - int py = SkClampPos(prev_y) * sumStride;
|
| - int ny = SkFastMin32(next_y, sh) * sumStride;
|
| -
|
| - int prev_x = -2*rx;
|
| - int next_x = 1;
|
| - int x = 0;
|
| -
|
| - for (; x < 2*rx; ++x) {
|
| - SkASSERT(prev_x <= 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - int px = 0;
|
| - int nx = next_x;
|
| -
|
| - uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - int i0 = prev_x + py;
|
| - int i1 = next_x + ny;
|
| - int i2 = next_x + py;
|
| - int i3 = prev_x + ny;
|
| -
|
| -#if UNROLL_KERNEL_LOOP
|
| - for (; x < dw - 2*rx - 4; x += 4) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| - tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| - tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| - tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 4;
|
| - next_x += 4;
|
| - }
|
| -#endif
|
| -
|
| - for (; x < dw - 2*rx; ++x) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - for (; x < dw; ++x) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x > sw);
|
| -
|
| - int px = prev_x;
|
| - int nx = sw;
|
| -
|
| - uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
|
| - *dst++ = SkToU8(tmp * scale >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - prev_y += 1;
|
| - next_y += 1;
|
| - }
|
| -}
|
| -
|
| -/**
|
| - * This is the path for apply_kernel_interp() to be taken when the kernel
|
| - * is wider than the source image.
|
| - */
|
| -static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
|
| - const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
|
| - SkASSERT(2*rx > sw);
|
| -
|
| - int innerWeight = 255 - outerWeight;
|
| -
|
| - // round these guys up if they're bigger than 127
|
| - outerWeight += outerWeight >> 7;
|
| - innerWeight += innerWeight >> 7;
|
| -
|
| - uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
|
| - uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
|
| -
|
| - int sumStride = sw + 1;
|
| -
|
| - int dw = sw + 2*rx;
|
| - int dh = sh + 2*ry;
|
| -
|
| - int prev_y = -2*ry;
|
| - int next_y = 1;
|
| -
|
| - for (int y = 0; y < dh; ++y) {
|
| - int py = SkClampPos(prev_y) * sumStride;
|
| - int ny = SkFastMin32(next_y, sh) * sumStride;
|
| -
|
| - int ipy = SkClampPos(prev_y + 1) * sumStride;
|
| - int iny = SkClampMax(next_y - 1, sh) * sumStride;
|
| -
|
| - int prev_x = -2*rx;
|
| - int next_x = 1;
|
| -
|
| - for (int x = 0; x < dw; ++x) {
|
| - int px = SkClampPos(prev_x);
|
| - int nx = SkFastMin32(next_x, sw);
|
| -
|
| - int ipx = SkClampPos(prev_x + 1);
|
| - int inx = SkClampMax(next_x - 1, sw);
|
| -
|
| - uint32_t outerSum = sum[px+py] + sum[nx+ny]
|
| - - sum[nx+py] - sum[px+ny];
|
| - uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
|
| - - sum[inx+ipy] - sum[ipx+iny];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| - prev_y += 1;
|
| - next_y += 1;
|
| - }
|
| -}
|
| -
|
| -/**
|
| - * sw and sh are the width and height of the src. Since the sum buffer
|
| - * matches that, but has an extra row and col at the beginning (with zeros),
|
| - * we can just use sw and sh as our "max" values for pinning coordinates
|
| - * when sampling into sum[][]
|
| - *
|
| - * The inner loop is conceptually simple; we break it into several variants
|
| - * to improve performance. Here's the original version:
|
| - for (int x = 0; x < dw; ++x) {
|
| - int px = SkClampPos(prev_x);
|
| - int nx = SkFastMin32(next_x, sw);
|
| -
|
| - int ipx = SkClampPos(prev_x + 1);
|
| - int inx = SkClampMax(next_x - 1, sw);
|
| -
|
| - uint32_t outerSum = sum[px+py] + sum[nx+ny]
|
| - - sum[nx+py] - sum[px+ny];
|
| - uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
|
| - - sum[inx+ipy] - sum[ipx+iny];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| - * The sections are:
|
| - * left-hand section, where prev_x is clamped to 0
|
| - * center section, where neither prev_x nor next_x is clamped
|
| - * right-hand section, where next_x is clamped to sw
|
| - * On some operating systems, the center section is unrolled for additional
|
| - * speedup.
|
| -*/
|
| -static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
|
| - const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
|
| - SkASSERT(rx > 0 && ry > 0);
|
| - SkASSERT(outerWeight <= 255);
|
| -
|
| - if (2*rx > sw) {
|
| - kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
|
| - return;
|
| - }
|
| -
|
| - int innerWeight = 255 - outerWeight;
|
| -
|
| - // round these guys up if they're bigger than 127
|
| - outerWeight += outerWeight >> 7;
|
| - innerWeight += innerWeight >> 7;
|
| -
|
| - uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
|
| - uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
|
| -
|
| - int sumStride = sw + 1;
|
| -
|
| - int dw = sw + 2*rx;
|
| - int dh = sh + 2*ry;
|
| -
|
| - int prev_y = -2*ry;
|
| - int next_y = 1;
|
| -
|
| - SkASSERT(2*rx <= dw - 2*rx);
|
| -
|
| - for (int y = 0; y < dh; ++y) {
|
| - int py = SkClampPos(prev_y) * sumStride;
|
| - int ny = SkFastMin32(next_y, sh) * sumStride;
|
| -
|
| - int ipy = SkClampPos(prev_y + 1) * sumStride;
|
| - int iny = SkClampMax(next_y - 1, sh) * sumStride;
|
| -
|
| - int prev_x = -2*rx;
|
| - int next_x = 1;
|
| - int x = 0;
|
| -
|
| - for (; x < 2*rx; ++x) {
|
| - SkASSERT(prev_x < 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - int px = 0;
|
| - int nx = next_x;
|
| -
|
| - int ipx = 0;
|
| - int inx = next_x - 1;
|
| -
|
| - uint32_t outerSum = sum[px+py] + sum[nx+ny]
|
| - - sum[nx+py] - sum[px+ny];
|
| - uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
|
| - - sum[inx+ipy] - sum[ipx+iny];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - int i0 = prev_x + py;
|
| - int i1 = next_x + ny;
|
| - int i2 = next_x + py;
|
| - int i3 = prev_x + ny;
|
| - int i4 = prev_x + 1 + ipy;
|
| - int i5 = next_x - 1 + iny;
|
| - int i6 = next_x - 1 + ipy;
|
| - int i7 = prev_x + 1 + iny;
|
| -
|
| -#if UNROLL_KERNEL_LOOP
|
| - for (; x < dw - 2*rx - 4; x += 4) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| - outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| - outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| - outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 4;
|
| - next_x += 4;
|
| - }
|
| -#endif
|
| -
|
| - for (; x < dw - 2*rx; ++x) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x <= sw);
|
| -
|
| - uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
|
| - uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - for (; x < dw; ++x) {
|
| - SkASSERT(prev_x >= 0);
|
| - SkASSERT(next_x > sw);
|
| -
|
| - int px = prev_x;
|
| - int nx = sw;
|
| -
|
| - int ipx = prev_x + 1;
|
| - int inx = sw;
|
| -
|
| - uint32_t outerSum = sum[px+py] + sum[nx+ny]
|
| - - sum[nx+py] - sum[px+ny];
|
| - uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
|
| - - sum[inx+ipy] - sum[ipx+iny];
|
| - *dst++ = SkToU8((outerSum * outerScale
|
| - + innerSum * innerScale) >> 24);
|
| -
|
| - prev_x += 1;
|
| - next_x += 1;
|
| - }
|
| -
|
| - prev_y += 1;
|
| - next_y += 1;
|
| - }
|
| -}
|
| -
|
| #include "SkColorPriv.h"
|
|
|
| static void merge_src_with_blur(uint8_t dst[], int dstRB,
|
| @@ -955,7 +479,7 @@
|
|
|
| bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
|
| SkScalar radius, Style style, Quality quality,
|
| - SkIPoint* margin, bool separable)
|
| + SkIPoint* margin)
|
| {
|
|
|
| if (src.fFormat != SkMask::kA8_Format) {
|
| @@ -1011,78 +535,40 @@
|
| SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
|
|
|
| // build the blurry destination
|
| - if (separable) {
|
| - SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
|
| - uint8_t* tp = tmpBuffer.get();
|
| - int w = sw, h = sh;
|
| + SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
|
| + uint8_t* tp = tmpBuffer.get();
|
| + int w = sw, h = sh;
|
|
|
| - if (outerWeight == 255) {
|
| - int loRadius, hiRadius;
|
| - get_adjusted_radii(passRadius, &loRadius, &hiRadius);
|
| - if (kHigh_Quality == quality) {
|
| - // Do three X blurs, with a transpose on the final one.
|
| - w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
|
| - w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
|
| - w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
|
| - // Do three Y blurs, with a transpose on the final one.
|
| - h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
|
| - h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
|
| - h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
|
| - } else {
|
| - w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
|
| - h = boxBlur(tp, h, dp, ry, ry, h, w, true);
|
| - }
|
| + if (outerWeight == 255) {
|
| + int loRadius, hiRadius;
|
| + get_adjusted_radii(passRadius, &loRadius, &hiRadius);
|
| + if (kHigh_Quality == quality) {
|
| + // Do three X blurs, with a transpose on the final one.
|
| + w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
|
| + w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
|
| + w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
|
| + // Do three Y blurs, with a transpose on the final one.
|
| + h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
|
| + h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
|
| + h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
|
| } else {
|
| - if (kHigh_Quality == quality) {
|
| - // Do three X blurs, with a transpose on the final one.
|
| - w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
|
| - w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
|
| - w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
|
| - // Do three Y blurs, with a transpose on the final one.
|
| - h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
|
| - h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
|
| - h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
|
| - } else {
|
| - w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
|
| - h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
|
| - }
|
| + w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
|
| + h = boxBlur(tp, h, dp, ry, ry, h, w, true);
|
| }
|
| } else {
|
| - const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
|
| - const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
|
| - SkAutoTMalloc<uint32_t> storage(storageW * storageH);
|
| - uint32_t* sumBuffer = storage.get();
|
| -
|
| - //pass1: sp is source, dp is destination
|
| - build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
|
| - if (outerWeight == 255) {
|
| - apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
|
| + if (kHigh_Quality == quality) {
|
| + // Do three X blurs, with a transpose on the final one.
|
| + w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
|
| + w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
|
| + w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
|
| + // Do three Y blurs, with a transpose on the final one.
|
| + h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
|
| + h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
|
| + h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
|
| } else {
|
| - apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight);
|
| + w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
|
| + h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
|
| }
|
| -
|
| - if (kHigh_Quality == quality) {
|
| - //pass2: dp is source, tmpBuffer is destination
|
| - int tmp_sw = sw + 2 * rx;
|
| - int tmp_sh = sh + 2 * ry;
|
| - SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
|
| - build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
|
| - if (outerWeight == 255)
|
| - apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
|
| - else
|
| - apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
|
| - tmp_sw, tmp_sh, outerWeight);
|
| -
|
| - //pass3: tmpBuffer is source, dp is destination
|
| - tmp_sw += 2 * rx;
|
| - tmp_sh += 2 * ry;
|
| - build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
|
| - if (outerWeight == 255)
|
| - apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
|
| - else
|
| - apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
|
| - outerWeight);
|
| - }
|
| }
|
|
|
| dst->fImage = dp;
|
| @@ -1115,20 +601,6 @@
|
| return true;
|
| }
|
|
|
| -bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
|
| - SkScalar radius, Style style, Quality quality,
|
| - SkIPoint* margin)
|
| -{
|
| - return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
|
| -}
|
| -
|
| -bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
|
| - SkScalar radius, Style style, Quality quality,
|
| - SkIPoint* margin)
|
| -{
|
| - return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
|
| -}
|
| -
|
| /* Convolving a box with itself three times results in a piecewise
|
| quadratic function:
|
|
|
|
|