src/opts/SkBitmapProcState_opts_arm.cpp - Issue 20749003: add scalar stubs for neon functions to give arm guys a place to work

Side by Side Diff: src/opts/SkBitmapProcState_opts_arm.cpp

Issue 20749003: add scalar stubs for neon functions to give arm guys a place to work (Closed) Base URL: https://skia.googlecode.com/svn/trunk

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright 2009 The Android Open Source Project	2 * Copyright 2009 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8	8

9 #include "SkBitmapProcState.h"	9 #include "SkBitmapProcState.h"

10 #include "SkColorPriv.h"	10 #include "SkColorPriv.h"

11 #include "SkPaint.h"	11 #include "SkPaint.h"

12 #include "SkTypes.h"	12 #include "SkTypes.h"

13 #include "SkUtils.h"	13 #include "SkUtils.h"

	14 #include "SkUtilsArm.h"

	15

	16 #include "SkConvolver.h"

14	17

15 #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)	18 #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)

16 void SI8_D16_nofilter_DX_arm(	19 void SI8_D16_nofilter_DX_arm(

17 const SkBitmapProcState& s,	20 const SkBitmapProcState& s,

18 const uint32_t* SK_RESTRICT xy,	21 const uint32_t* SK_RESTRICT xy,

19 int count,	22 int count,

20 uint16_t* SK_RESTRICT colors) SK_ATTRIBUTE_OPTIMIZE_O1;	23 uint16_t* SK_RESTRICT colors) SK_ATTRIBUTE_OPTIMIZE_O1;

21	24

22 void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s,	25 void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s,

23 const uint32_t* SK_RESTRICT xy,	26 const uint32_t* SK_RESTRICT xy,

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
212 fShaderProc32 = NULL;	215 fShaderProc32 = NULL;

213 }	216 }

214 }	217 }

215 #endif	218 #endif

216 break;	219 break;

217 default:	220 default:

218 break;	221 break;

219 }	222 }

220 }	223 }

221	224

	225 /////////////////////////////////////

	226

	227 /* FUNCTIONS BELOW ARE SCALAR STUBS INTENDED FOR ARM DEVELOPERS TO REPLACE */

	228

	229 /////////////////////////////////////

	230

	231

	232 static inline unsigned char ClampTo8(int a) {

	233 if (static_cast<unsigned>(a) < 256) {

	234 return a; // Avoid the extra check in the common case.

	235 }

	236 if (a < 0) {

	237 return 0;

	238 }

	239 return 255;

	240 }

	241

	242 // Convolves horizontally along a single row. The row data is given in

	243 // \|srcData\| and continues for the numValues() of the filter.

	244 void convolveHorizontally_arm(const unsigned char* srcData,

	245 const SkConvolutionFilter1D& filter,

	246 unsigned char* outRow,

	247 bool hasAlpha) {

	248 // Loop over each pixel on this row in the output image.

	249 int numValues = filter.numValues();

	250 for (int outX = 0; outX < numValues; outX++) {

	251 // Get the filter that determines the current output pixel.

	252 int filterOffset, filterLength;

	253 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

	254 filter.FilterForValue(outX, &filterOffset, &filterLength);

	255

	256 // Compute the first pixel in this row that the filter affects. It will

	257 // touch \|filterLength\| pixels (4 bytes each) after this.

	258 const unsigned char* rowToFilter = &srcData[filterOffset * 4];

	259

	260 // Apply the filter to the row to get the destination pixel in \|accum\|.

	261 int accum[4] = {0};

	262 for (int filterX = 0; filterX < filterLength; filterX++) {

	263 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[fil terX];

	264 accum[0] += curFilter * rowToFilter[filterX * 4 + 0];

	265 accum[1] += curFilter * rowToFilter[filterX * 4 + 1];

	266 accum[2] += curFilter * rowToFilter[filterX * 4 + 2];

	267 if (hasAlpha) {

	268 accum[3] += curFilter * rowToFilter[filterX * 4 + 3];

	269 }

	270 }

	271

	272 // Bring this value back in range. All of the filter scaling factors

	273 // are in fixed point with kShiftBits bits of fractional part.

	274 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

	275 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

	276 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

	277 if (hasAlpha) {

	278 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

	279 }

	280

	281 // Store the new pixel.

	282 outRow[outX * 4 + 0] = ClampTo8(accum[0]);

	283 outRow[outX * 4 + 1] = ClampTo8(accum[1]);

	284 outRow[outX * 4 + 2] = ClampTo8(accum[2]);

	285 if (hasAlpha) {

	286 outRow[outX * 4 + 3] = ClampTo8(accum[3]);

	287 }

	288 }

	289 }

	290

	291 // Does vertical convolution to produce one output row. The filter values and

	292 // length are given in the first two parameters. These are applied to each

	293 // of the rows pointed to in the \|sourceDataRows\| array, with each row

	294 // being \|pixelWidth\| wide.

	295 //

	296 // The output must have room for \|pixelWidth * 4\| bytes.

	297 template<bool hasAlpha>

	298 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* f ilterValues,

	299 int filterLength,

	300 unsigned char* const* sourceDataRows,

	301 int pixelWidth,

	302 unsigned char* outRow) {

	303 // We go through each column in the output and do a vertical convolution ,

	304 // generating one output pixel each time.

	305 for (int outX = 0; outX < pixelWidth; outX++) {

	306 // Compute the number of bytes over in each row that the current col umn

	307 // we're convolving starts at. The pixel will cover the next 4 bytes .

	308 int byteOffset = outX * 4;

	309

	310 // Apply the filter to one column of pixels.

	311 int accum[4] = {0};

	312 for (int filterY = 0; filterY < filterLength; filterY++) {

	313 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues [filterY];

	314 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];

	315 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];

	316 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];

	317 if (hasAlpha) {

	318 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];

	319 }

	320 }

	321

	322 // Bring this value back in range. All of the filter scaling factors

	323 // are in fixed point with kShiftBits bits of precision.

	324 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

	325 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

	326 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

	327 if (hasAlpha) {

	328 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

	329 }

	330

	331 // Store the new pixel.

	332 outRow[byteOffset + 0] = ClampTo8(accum[0]);

	333 outRow[byteOffset + 1] = ClampTo8(accum[1]);

	334 outRow[byteOffset + 2] = ClampTo8(accum[2]);

	335 if (hasAlpha) {

	336 unsigned char alpha = ClampTo8(accum[3]);

	337

	338 // Make sure the alpha channel doesn't come out smaller than any of the

	339 // color channels. We use premultipled alpha channels, so this s hould

	340 // never happen, but rounding errors will cause this from time t o time.

	341 // These "impossible" colors will cause overflows (and hence ran dom pixel

	342 // values) when the resulting bitmap is drawn to the screen.

	343 //

	344 // We only need to do this when generating the final output row (here).

	345 int maxColorChannel = SkTMax(outRow[byteOffset + 0],

	346 SkTMax(outRow[byteOffset + 1],

	347 outRow[byteOffset + 2]));

	348 if (alpha < maxColorChannel) {

	349 outRow[byteOffset + 3] = maxColorChannel;

	350 } else {

	351 outRow[byteOffset + 3] = alpha;

	352 }

	353 } else {

	354 // No alpha channel, the image is opaque.

	355 outRow[byteOffset + 3] = 0xff;

	356 }

	357 }

	358 }

	359

	360 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,

	361 int filterLength,

	362 unsigned char* const* sourceDataRows,

	363 int pixelWidth,

	364 unsigned char* outRow,

	365 bool sourceHasAlpha) {

	366 if (sourceHasAlpha) {

	367 convolveVertically_arm<true>(filterValues, filterLength,

	368 sourceDataRows, pixelWidth,

	369 outRow);

	370 } else {

	371 convolveVertically_arm<false>(filterValues, filterLength,

	372 sourceDataRows, pixelWidth,

	373 outRow);

	374 }

	375 }

	376

	377 // Convolves horizontally along four rows. The row data is given in

	378 // \|src_data\| and continues for the num_values() of the filter.

	379 // The algorithm is almost same as \|ConvolveHorizontally_SSE2\|. Please

	380 // refer to that function for detailed comments.

	381 void convolve4RowsHorizontally_arm(const unsigned char* src_data[4],

	382 const SkConvolutionFilter1D& filter,

	383 unsigned char* out_row[4]) {

	384 }

	385

	386 ///////////////////////////

	387

	388 /* STOP REWRITING FUNCTIONS HERE, BUT DON'T FORGET TO EDIT THE

	389 PLATFORM CONVOLUTION PROCS BELOW */

	390

	391 ///////////////////////////

	392

	393 void applySIMDPadding_arm(SkConvolutionFilter1D *filter) {

	394 // Padding \|paddingCount\| of more dummy coefficients after the coefficients

	395 // of last filter to prevent SIMD instructions which load 8 or 16 bytes

	396 // together to access invalid memory areas. We are not trying to align the

	397 // coefficients right now due to the opaqueness of <vector> implementation.

	398 // This has to be done after all \|AddFilter\| calls.

	399 for (int i = 0; i < 8; ++i) {

	400 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix ed>(0));

	401 }

	402 }

	403

222 void SkBitmapProcState::platformConvolutionProcs() {	404 void SkBitmapProcState::platformConvolutionProcs() {

223 // no specialization for ARM here yet.	405 if (sk_cpu_arm_has_neon()) {

224 }	406 fConvolutionProcs->fExtraHorizontalReads = 3;

	407 fConvolutionProcs->fConvolveVertically = &convolveVertically_arm;

	408

	409 // next line is commented out because the four-row convolution function above is

	410 // just a no-op. Please see the comment above its definition, and the S SE implementation

	411 // in SkBitmapProcState_opts_SSE2.cpp for guidance on its semantics.

	412 // leaving it as NULL will just cause the convolution system to not atte mpt

	413 // to operate on four rows at once, which is correct but not performance -optimal.

	414

	415 // fConvolutionProcs->fConvolve4RowsHorizontally = &convolve4RowsHorizon tally_arm;

	416

	417 fConvolutionProcs->fConvolve4RowsHorizontally = NULL;

	418

	419 fConvolutionProcs->fConvolveHorizontally = &convolveHorizontally_arm;

	420 fConvolutionProcs->fApplySIMDPadding = &applySIMDPadding_arm;

	421 }

	422 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »