third_party/libwebp/enc/enc.c - Issue 9328013: remove 2 unused files that are from an old version

Side by Side Diff: third_party/libwebp/enc/enc.c

Issue 9328013: remove 2 unused files that are from an old version (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright 2011 Google Inc.

2 //

3 // This code is licensed under the same terms as WebM:

4 // Software License Agreement: http://www.webmproject.org/license/software/

5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/

6 // -----------------------------------------------------------------------------

7 //

8 // speed-critical functions.

9 //

10 // Author: Skal (pascal.massimino@gmail.com)

11

12 #include <assert.h>

13 #include "vp8enci.h"

14

15 #if defined(__cplusplus) \|\| defined(c_plusplus)

16 extern "C" {

17 #endif

18

19 //------------------------------------------------------------------------------

20 // Compute susceptibility based on DCT-coeff histograms:

21 // the higher, the "easier" the macroblock is to compress.

22

23 static int ClipAlpha(int alpha) {

24 return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;

25 }

26

27 int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {

28 int num = 0, den = 0, val = 0;

29 int k;

30 int alpha;

31 // note: changing this loop to avoid the numerous "k + 1" slows things down.

32 for (k = 0; k < MAX_COEFF_THRESH; ++k) {

33 if (histo[k + 1]) {

34 val += histo[k + 1];

35 num += val * (k + 1);

36 den += (k + 1) * (k + 1);

37 }

38 }

39 // we scale the value to a usable [0..255] range

40 alpha = den ? 10 * num / den - 5 : 0;

41 return ClipAlpha(alpha);

42 }

43

44 static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,

45 int start_block, int end_block) {

46 int histo[MAX_COEFF_THRESH + 1] = { 0 };

47 int16_t out[16];

48 int j, k;

49 for (j = start_block; j < end_block; ++j) {

50 VP8FTransform(ref + VP8Scan[j], pred + VP8Scan[j], out);

51

52 // Convert coefficients to bin (within out[]).

53 for (k = 0; k < 16; ++k) {

54 const int v = abs(out[k]) >> 2;

55 out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;

56 }

57

58 // Use bin to update histogram.

59 for (k = 0; k < 16; ++k) {

60 histo[out[k]]++;

61 }

62 }

63

64 return VP8GetAlpha(histo);

65 }

66

67 //------------------------------------------------------------------------------

68 // run-time tables (~4k)

69

70 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]

71

72 // We declare this variable 'volatile' to prevent instruction reordering

73 // and make sure it's set to true _last_ (so as to be thread-safe)

74 static volatile int tables_ok = 0;

75

76 static void InitTables(void) {

77 if (!tables_ok) {

78 int i;

79 for (i = -255; i <= 255 + 255; ++i) {

80 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;

81 }

82 tables_ok = 1;

83 }

84 }

85

86 static inline uint8_t clip_8b(int v) {

87 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;

88 }

89

90 //------------------------------------------------------------------------------

91 // Transforms (Paragraph 14.4)

92

93 #define STORE(x, y, v) \

94 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))

95

96 static const int kC1 = 20091 + (1 << 16);

97 static const int kC2 = 35468;

98 #define MUL(a, b) (((a) * (b)) >> 16)

99

100 static inline void ITransformOne(const uint8_t* ref, const int16_t* in,

101 uint8_t* dst) {

102 int C[4 * 4], *tmp;

103 int i;

104 tmp = C;

105 for (i = 0; i < 4; ++i) { // vertical pass

106 const int a = in[0] + in[8];

107 const int b = in[0] - in[8];

108 const int c = MUL(in[4], kC2) - MUL(in[12], kC1);

109 const int d = MUL(in[4], kC1) + MUL(in[12], kC2);

110 tmp[0] = a + d;

111 tmp[1] = b + c;

112 tmp[2] = b - c;

113 tmp[3] = a - d;

114 tmp += 4;

115 in++;

116 }

117

118 tmp = C;

119 for (i = 0; i < 4; ++i) { // horizontal pass

120 const int dc = tmp[0] + 4;

121 const int a = dc + tmp[8];

122 const int b = dc - tmp[8];

123 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);

124 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);

125 STORE(0, i, a + d);

126 STORE(1, i, b + c);

127 STORE(2, i, b - c);

128 STORE(3, i, a - d);

129 tmp++;

130 }

131 }

132

133 static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,

134 int do_two) {

135 ITransformOne(ref, in, dst);

136 if (do_two) {

137 ITransformOne(ref + 4, in + 16, dst + 4);

138 }

139 }

140

141 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {

142 int i;

143 int tmp[16];

144 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {

145 const int d0 = src[0] - ref[0];

146 const int d1 = src[1] - ref[1];

147 const int d2 = src[2] - ref[2];

148 const int d3 = src[3] - ref[3];

149 const int a0 = (d0 + d3) << 3;

150 const int a1 = (d1 + d2) << 3;

151 const int a2 = (d1 - d2) << 3;

152 const int a3 = (d0 - d3) << 3;

153 tmp[0 + i * 4] = (a0 + a1);

154 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;

155 tmp[2 + i * 4] = (a0 - a1);

156 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12;

157 }

158 for (i = 0; i < 4; ++i) {

159 const int a0 = (tmp[0 + i] + tmp[12 + i]);

160 const int a1 = (tmp[4 + i] + tmp[ 8 + i]);

161 const int a2 = (tmp[4 + i] - tmp[ 8 + i]);

162 const int a3 = (tmp[0 + i] - tmp[12 + i]);

163 out[0 + i] = (a0 + a1 + 7) >> 4;

164 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);

165 out[8 + i] = (a0 - a1 + 7) >> 4;

166 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);

167 }

168 }

169

170 static void ITransformWHT(const int16_t* in, int16_t* out) {

171 int tmp[16];

172 int i;

173 for (i = 0; i < 4; ++i) {

174 const int a0 = in[0 + i] + in[12 + i];

175 const int a1 = in[4 + i] + in[ 8 + i];

176 const int a2 = in[4 + i] - in[ 8 + i];

177 const int a3 = in[0 + i] - in[12 + i];

178 tmp[0 + i] = a0 + a1;

179 tmp[8 + i] = a0 - a1;

180 tmp[4 + i] = a3 + a2;

181 tmp[12 + i] = a3 - a2;

182 }

183 for (i = 0; i < 4; ++i) {

184 const int dc = tmp[0 + i * 4] + 3; // w/ rounder

185 const int a0 = dc + tmp[3 + i * 4];

186 const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];

187 const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];

188 const int a3 = dc - tmp[3 + i * 4];

189 out[ 0] = (a0 + a1) >> 3;

190 out[16] = (a3 + a2) >> 3;

191 out[32] = (a0 - a1) >> 3;

192 out[48] = (a3 - a2) >> 3;

193 out += 64;

194 }

195 }

196

197 static void FTransformWHT(const int16_t* in, int16_t* out) {

198 int tmp[16];

199 int i;

200 for (i = 0; i < 4; ++i, in += 64) {

201 const int a0 = (in[0 * 16] + in[2 * 16]) << 2;

202 const int a1 = (in[1 * 16] + in[3 * 16]) << 2;

203 const int a2 = (in[1 * 16] - in[3 * 16]) << 2;

204 const int a3 = (in[0 * 16] - in[2 * 16]) << 2;

205 tmp[0 + i * 4] = (a0 + a1) + (a0 != 0);

206 tmp[1 + i * 4] = a3 + a2;

207 tmp[2 + i * 4] = a3 - a2;

208 tmp[3 + i * 4] = a0 - a1;

209 }

210 for (i = 0; i < 4; ++i) {

211 const int a0 = (tmp[0 + i] + tmp[8 + i]);

212 const int a1 = (tmp[4 + i] + tmp[12+ i]);

213 const int a2 = (tmp[4 + i] - tmp[12+ i]);

214 const int a3 = (tmp[0 + i] - tmp[8 + i]);

215 const int b0 = a0 + a1;

216 const int b1 = a3 + a2;

217 const int b2 = a3 - a2;

218 const int b3 = a0 - a1;

219 out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3;

220 out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3;

221 out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3;

222 out[12 + i] = (b3 + (b3 > 0) + 3) >> 3;

223 }

224 }

225

226 #undef MUL

227 #undef STORE

228

229 //------------------------------------------------------------------------------

230 // Intra predictions

231

232 #define OUT(x, y) dst[(x) + (y) * BPS]

233

234 static inline void Fill(uint8_t* dst, int value, int size) {

235 int j;

236 for (j = 0; j < size; ++j) {

237 memset(dst + j * BPS, value, size);

238 }

239 }

240

241 static inline void VerticalPred(uint8_t* dst, const uint8_t* top, int size) {

242 int j;

243 if (top) {

244 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);

245 } else {

246 Fill(dst, 127, size);

247 }

248 }

249

250 static inline void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) {

251 if (left) {

252 int j;

253 for (j = 0; j < size; ++j) {

254 memset(dst + j * BPS, left[j], size);

255 }

256 } else {

257 Fill(dst, 129, size);

258 }

259 }

260

261 static inline void TrueMotion(uint8_t* dst, const uint8_t* left,

262 const uint8_t* top, int size) {

263 int y;

264 if (left) {

265 if (top) {

266 const uint8_t* const clip = clip1 + 255 - left[-1];

267 for (y = 0; y < size; ++y) {

268 const uint8_t* const clip_table = clip + left[y];

269 int x;

270 for (x = 0; x < size; ++x) {

271 dst[x] = clip_table[top[x]];

272 }

273 dst += BPS;

274 }

275 } else {

276 HorizontalPred(dst, left, size);

277 }

278 } else {

279 // true motion without left samples (hence: with default 129 value)

280 // is equivalent to VE prediction where you just copy the top samples.

281 // Note that if top samples are not available, the default value is

282 // then 129, and not 127 as in the VerticalPred case.

283 if (top) {

284 VerticalPred(dst, top, size);

285 } else {

286 Fill(dst, 129, size);

287 }

288 }

289 }

290

291 static inline void DCMode(uint8_t* dst, const uint8_t* left,

292 const uint8_t* top,

293 int size, int round, int shift) {

294 int DC = 0;

295 int j;

296 if (top) {

297 for (j = 0; j < size; ++j) DC += top[j];

298 if (left) { // top and left present

299 for (j = 0; j < size; ++j) DC += left[j];

300 } else { // top, but no left

301 DC += DC;

302 }

303 DC = (DC + round) >> shift;

304 } else if (left) { // left but no top

305 for (j = 0; j < size; ++j) DC += left[j];

306 DC += DC;

307 DC = (DC + round) >> shift;

308 } else { // no top, no left, nothing.

309 DC = 0x80;

310 }

311 Fill(dst, DC, size);

312 }

313

314 //------------------------------------------------------------------------------

315 // Chroma 8x8 prediction (paragraph 12.2)

316

317 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,

318 const uint8_t* top) {

319 // U block

320 DCMode(C8DC8 + dst, left, top, 8, 8, 4);

321 VerticalPred(C8VE8 + dst, top, 8);

322 HorizontalPred(C8HE8 + dst, left, 8);

323 TrueMotion(C8TM8 + dst, left, top, 8);

324 // V block

325 dst += 8;

326 if (top) top += 8;

327 if (left) left += 16;

328 DCMode(C8DC8 + dst, left, top, 8, 8, 4);

329 VerticalPred(C8VE8 + dst, top, 8);

330 HorizontalPred(C8HE8 + dst, left, 8);

331 TrueMotion(C8TM8 + dst, left, top, 8);

332 }

333

334 //------------------------------------------------------------------------------

335 // luma 16x16 prediction (paragraph 12.3)

336

337 static void Intra16Preds(uint8_t* dst,

338 const uint8_t* left, const uint8_t* top) {

339 DCMode(I16DC16 + dst, left, top, 16, 16, 5);

340 VerticalPred(I16VE16 + dst, top, 16);

341 HorizontalPred(I16HE16 + dst, left, 16);

342 TrueMotion(I16TM16 + dst, left, top, 16);

343 }

344

345 //------------------------------------------------------------------------------

346 // luma 4x4 prediction

347

348 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)

349 #define AVG2(a, b) (((a) + (b) + 1) >> 1)

350

351 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical

352 const uint8_t vals[4] = {

353 AVG3(top[-1], top[0], top[1]),

354 AVG3(top[ 0], top[1], top[2]),

355 AVG3(top[ 1], top[2], top[3]),

356 AVG3(top[ 2], top[3], top[4])

357 };

358 int i;

359 for (i = 0; i < 4; ++i) {

360 memcpy(dst + i * BPS, vals, 4);

361 }

362 }

363

364 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal

365 const int X = top[-1];

366 const int I = top[-2];

367 const int J = top[-3];

368 const int K = top[-4];

369 const int L = top[-5];

370 (uint32_t)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);

371 (uint32_t)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);

372 (uint32_t)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);

373 (uint32_t)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);

374 }

375

376 static void DC4(uint8_t* dst, const uint8_t* top) {

377 uint32_t dc = 4;

378 int i;

379 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];

380 Fill(dst, dc >> 3, 4);

381 }

382

383 static void RD4(uint8_t* dst, const uint8_t* top) {

384 const int X = top[-1];

385 const int I = top[-2];

386 const int J = top[-3];

387 const int K = top[-4];

388 const int L = top[-5];

389 const int A = top[0];

390 const int B = top[1];

391 const int C = top[2];

392 const int D = top[3];

393 OUT(0, 3) = AVG3(J, K, L);

394 OUT(0, 2) = OUT(1, 3) = AVG3(I, J, K);

395 OUT(0, 1) = OUT(1, 2) = OUT(2, 3) = AVG3(X, I, J);

396 OUT(0, 0) = OUT(1, 1) = OUT(2, 2) = OUT(3, 3) = AVG3(A, X, I);

397 OUT(1, 0) = OUT(2, 1) = OUT(3, 2) = AVG3(B, A, X);

398 OUT(2, 0) = OUT(3, 1) = AVG3(C, B, A);

399 OUT(3, 0) = AVG3(D, C, B);

400 }

401

402 static void LD4(uint8_t* dst, const uint8_t* top) {

403 const int A = top[0];

404 const int B = top[1];

405 const int C = top[2];

406 const int D = top[3];

407 const int E = top[4];

408 const int F = top[5];

409 const int G = top[6];

410 const int H = top[7];

411 OUT(0, 0) = AVG3(A, B, C);

412 OUT(1, 0) = OUT(0, 1) = AVG3(B, C, D);

413 OUT(2, 0) = OUT(1, 1) = OUT(0, 2) = AVG3(C, D, E);

414 OUT(3, 0) = OUT(2, 1) = OUT(1, 2) = OUT(0, 3) = AVG3(D, E, F);

415 OUT(3, 1) = OUT(2, 2) = OUT(1, 3) = AVG3(E, F, G);

416 OUT(3, 2) = OUT(2, 3) = AVG3(F, G, H);

417 OUT(3, 3) = AVG3(G, H, H);

418 }

419

420 static void VR4(uint8_t* dst, const uint8_t* top) {

421 const int X = top[-1];

422 const int I = top[-2];

423 const int J = top[-3];

424 const int K = top[-4];

425 const int A = top[0];

426 const int B = top[1];

427 const int C = top[2];

428 const int D = top[3];

429 OUT(0, 0) = OUT(1, 2) = AVG2(X, A);

430 OUT(1, 0) = OUT(2, 2) = AVG2(A, B);

431 OUT(2, 0) = OUT(3, 2) = AVG2(B, C);

432 OUT(3, 0) = AVG2(C, D);

433

434 OUT(0, 3) = AVG3(K, J, I);

435 OUT(0, 2) = AVG3(J, I, X);

436 OUT(0, 1) = OUT(1, 3) = AVG3(I, X, A);

437 OUT(1, 1) = OUT(2, 3) = AVG3(X, A, B);

438 OUT(2, 1) = OUT(3, 3) = AVG3(A, B, C);

439 OUT(3, 1) = AVG3(B, C, D);

440 }

441

442 static void VL4(uint8_t* dst, const uint8_t* top) {

443 const int A = top[0];

444 const int B = top[1];

445 const int C = top[2];

446 const int D = top[3];

447 const int E = top[4];

448 const int F = top[5];

449 const int G = top[6];

450 const int H = top[7];

451 OUT(0, 0) = AVG2(A, B);

452 OUT(1, 0) = OUT(0, 2) = AVG2(B, C);

453 OUT(2, 0) = OUT(1, 2) = AVG2(C, D);

454 OUT(3, 0) = OUT(2, 2) = AVG2(D, E);

455

456 OUT(0, 1) = AVG3(A, B, C);

457 OUT(1, 1) = OUT(0, 3) = AVG3(B, C, D);

458 OUT(2, 1) = OUT(1, 3) = AVG3(C, D, E);

459 OUT(3, 1) = OUT(2, 3) = AVG3(D, E, F);

460 OUT(3, 2) = AVG3(E, F, G);

461 OUT(3, 3) = AVG3(F, G, H);

462 }

463

464 static void HU4(uint8_t* dst, const uint8_t* top) {

465 const int I = top[-2];

466 const int J = top[-3];

467 const int K = top[-4];

468 const int L = top[-5];

469 OUT(0, 0) = AVG2(I, J);

470 OUT(2, 0) = OUT(0, 1) = AVG2(J, K);

471 OUT(2, 1) = OUT(0, 2) = AVG2(K, L);

472 OUT(1, 0) = AVG3(I, J, K);

473 OUT(3, 0) = OUT(1, 1) = AVG3(J, K, L);

474 OUT(3, 1) = OUT(1, 2) = AVG3(K, L, L);

475 OUT(3, 2) = OUT(2, 2) =

476 OUT(0, 3) = OUT(1, 3) = OUT(2, 3) = OUT(3, 3) = L;

477 }

478

479 static void HD4(uint8_t* dst, const uint8_t* top) {

480 const int X = top[-1];

481 const int I = top[-2];

482 const int J = top[-3];

483 const int K = top[-4];

484 const int L = top[-5];

485 const int A = top[0];

486 const int B = top[1];

487 const int C = top[2];

488

489 OUT(0, 0) = OUT(2, 1) = AVG2(I, X);

490 OUT(0, 1) = OUT(2, 2) = AVG2(J, I);

491 OUT(0, 2) = OUT(2, 3) = AVG2(K, J);

492 OUT(0, 3) = AVG2(L, K);

493

494 OUT(3, 0) = AVG3(A, B, C);

495 OUT(2, 0) = AVG3(X, A, B);

496 OUT(1, 0) = OUT(3, 1) = AVG3(I, X, A);

497 OUT(1, 1) = OUT(3, 2) = AVG3(J, I, X);

498 OUT(1, 2) = OUT(3, 3) = AVG3(K, J, I);

499 OUT(1, 3) = AVG3(L, K, J);

500 }

501

502 static void TM4(uint8_t* dst, const uint8_t* top) {

503 int x, y;

504 const uint8_t* const clip = clip1 + 255 - top[-1];

505 for (y = 0; y < 4; ++y) {

506 const uint8_t* const clip_table = clip + top[-2 - y];

507 for (x = 0; x < 4; ++x) {

508 dst[x] = clip_table[top[x]];

509 }

510 dst += BPS;

511 }

512 }

513

514 #undef AVG3

515 #undef AVG2

516

517 // Left samples are top[-5 .. -2], top_left is top[-1], top are

518 // located at top[0..3], and top right is top[4..7]

519 static void Intra4Preds(uint8_t* dst, const uint8_t* top) {

520 DC4(I4DC4 + dst, top);

521 TM4(I4TM4 + dst, top);

522 VE4(I4VE4 + dst, top);

523 HE4(I4HE4 + dst, top);

524 RD4(I4RD4 + dst, top);

525 VR4(I4VR4 + dst, top);

526 LD4(I4LD4 + dst, top);

527 VL4(I4VL4 + dst, top);

528 HD4(I4HD4 + dst, top);

529 HU4(I4HU4 + dst, top);

530 }

531

532 //------------------------------------------------------------------------------

533 // Metric

534

535 static inline int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) {

536 int count = 0;

537 int y, x;

538 for (y = 0; y < h; ++y) {

539 for (x = 0; x < w; ++x) {

540 const int diff = (int)a[x] - b[x];

541 count += diff * diff;

542 }

543 a += BPS;

544 b += BPS;

545 }

546 return count;

547 }

548

549 static int SSE16x16(const uint8_t* a, const uint8_t* b) {

550 return GetSSE(a, b, 16, 16);

551 }

552 static int SSE16x8(const uint8_t* a, const uint8_t* b) {

553 return GetSSE(a, b, 16, 8);

554 }

555 static int SSE8x8(const uint8_t* a, const uint8_t* b) {

556 return GetSSE(a, b, 8, 8);

557 }

558 static int SSE4x4(const uint8_t* a, const uint8_t* b) {

559 return GetSSE(a, b, 4, 4);

560 }

561

562 //------------------------------------------------------------------------------

563 // Texture distortion

564 //

565 // We try to match the spectral content (weighted) between source and

566 // reconstructed samples.

567

568 // Hadamard transform

569 // Returns the weighted sum of the absolute value of transformed coefficients.

570 static int TTransform(const uint8_t* in, const uint16_t* w) {

571 int sum = 0;

572 int tmp[16];

573 int i;

574 // horizontal pass

575 for (i = 0; i < 4; ++i, in += BPS) {

576 const int a0 = (in[0] + in[2]) << 2;

577 const int a1 = (in[1] + in[3]) << 2;

578 const int a2 = (in[1] - in[3]) << 2;

579 const int a3 = (in[0] - in[2]) << 2;

580 tmp[0 + i * 4] = a0 + a1 + (a0 != 0);

581 tmp[1 + i * 4] = a3 + a2;

582 tmp[2 + i * 4] = a3 - a2;

583 tmp[3 + i * 4] = a0 - a1;

584 }

585 // vertical pass

586 for (i = 0; i < 4; ++i, ++w) {

587 const int a0 = (tmp[0 + i] + tmp[8 + i]);

588 const int a1 = (tmp[4 + i] + tmp[12+ i]);

589 const int a2 = (tmp[4 + i] - tmp[12+ i]);

590 const int a3 = (tmp[0 + i] - tmp[8 + i]);

591 const int b0 = a0 + a1;

592 const int b1 = a3 + a2;

593 const int b2 = a3 - a2;

594 const int b3 = a0 - a1;

595 // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3

596 sum += w[ 0] * ((abs(b0) + 3) >> 3);

597 sum += w[ 4] * ((abs(b1) + 3) >> 3);

598 sum += w[ 8] * ((abs(b2) + 3) >> 3);

599 sum += w[12] * ((abs(b3) + 3) >> 3);

600 }

601 return sum;

602 }

603

604 static int Disto4x4(const uint8_t* const a, const uint8_t* const b,

605 const uint16_t* const w) {

606 const int sum1 = TTransform(a, w);

607 const int sum2 = TTransform(b, w);

608 return (abs(sum2 - sum1) + 8) >> 4;

609 }

610

611 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,

612 const uint16_t* const w) {

613 int D = 0;

614 int x, y;

615 for (y = 0; y < 16 * BPS; y += 4 * BPS) {

616 for (x = 0; x < 16; x += 4) {

617 D += Disto4x4(a + x + y, b + x + y, w);

618 }

619 }

620 return D;

621 }

622

623 //------------------------------------------------------------------------------

624 // Quantization

625 //

626

627 // Simple quantization

628 static int QuantizeBlock(int16_t in[16], int16_t out[16],

629 int n, const VP8Matrix* const mtx) {

630 int last = -1;

631 for (; n < 16; ++n) {

632 const int j = VP8Zigzag[n];

633 const int sign = (in[j] < 0);

634 int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

635 if (coeff > 2047) coeff = 2047;

636 if (coeff > mtx->zthresh_[j]) {

637 const int Q = mtx->q_[j];

638 const int iQ = mtx->iq_[j];

639 const int B = mtx->bias_[j];

640 out[n] = QUANTDIV(coeff, iQ, B);

641 if (sign) out[n] = -out[n];

642 in[j] = out[n] * Q;

643 if (out[n]) last = n;

644 } else {

645 out[n] = 0;

646 in[j] = 0;

647 }

648 }

649 return (last >= 0);

650 }

651

652 //------------------------------------------------------------------------------

653 // Block copy

654

655 static inline void Copy(const uint8_t* src, uint8_t* dst, int size) {

656 int y;

657 for (y = 0; y < size; ++y) {

658 memcpy(dst, src, size);

659 src += BPS;

660 dst += BPS;

661 }

662 }

663

664 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }

665 static void Copy8x8(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 8); }

666 static void Copy16x16(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 16); }

667

668 //------------------------------------------------------------------------------

669 // SSE2 detection.

670 //

671

672 #if defined(__pic__) && defined(__i386__)

673 static inline void GetCPUInfo(int cpu_info[4], int info_type) {

674 __asm__ volatile (

675 "mov %%ebx, %%edi\n"

676 "cpuid\n"

677 "xchg %%edi, %%ebx\n"

678 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])

679 : "a"(info_type));

680 }

681 #elif defined(__i386__) \|\| defined(__x86_64__)

682 static inline void GetCPUInfo(int cpu_info[4], int info_type) {

683 __asm__ volatile (

684 "cpuid\n"

685 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])

686 : "a"(info_type));

687 }

688 #elif defined(_MSC_VER) // Visual C++

689 #define GetCPUInfo __cpuid

690 #endif

691

692 #if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(_MSC_VER)

693 static int x86CPUInfo(CPUFeature feature) {

694 int cpu_info[4];

695 GetCPUInfo(cpu_info, 1);

696 if (feature == kSSE2) {

697 return 0 != (cpu_info[3] & 0x04000000);

698 }

699 if (feature == kSSE3) {

700 return 0 != (cpu_info[2] & 0x00000001);

701 }

702 return 0;

703 }

704 VP8CPUInfo VP8EncGetCPUInfo = x86CPUInfo;

705 #else

706 VP8CPUInfo VP8EncGetCPUInfo = NULL;

707 #endif

708

709 // Speed-critical function pointers. We have to initialize them to the default

710 // implementations within VP8EncDspInit().

711 VP8CHisto VP8CollectHistogram;

712 VP8Idct VP8ITransform;

713 VP8Fdct VP8FTransform;

714 VP8WHT VP8ITransformWHT;

715 VP8WHT VP8FTransformWHT;

716 VP8Intra4Preds VP8EncPredLuma4;

717 VP8IntraPreds VP8EncPredLuma16;

718 VP8IntraPreds VP8EncPredChroma8;

719 VP8Metric VP8SSE16x16;

720 VP8Metric VP8SSE8x8;

721 VP8Metric VP8SSE16x8;

722 VP8Metric VP8SSE4x4;

723 VP8WMetric VP8TDisto4x4;

724 VP8WMetric VP8TDisto16x16;

725 VP8QuantizeBlock VP8EncQuantizeBlock;

726 VP8BlockCopy VP8Copy4x4;

727 VP8BlockCopy VP8Copy8x8;

728 VP8BlockCopy VP8Copy16x16;

729

730 extern void VP8EncDspInitSSE2(void);

731

732 void VP8EncDspInit(void) {

733 InitTables();

734

735 // default C implementations

736 VP8CollectHistogram = CollectHistogram;

737 VP8ITransform = ITransform;

738 VP8FTransform = FTransform;

739 VP8ITransformWHT = ITransformWHT;

740 VP8FTransformWHT = FTransformWHT;

741 VP8EncPredLuma4 = Intra4Preds;

742 VP8EncPredLuma16 = Intra16Preds;

743 VP8EncPredChroma8 = IntraChromaPreds;

744 VP8SSE16x16 = SSE16x16;

745 VP8SSE8x8 = SSE8x8;

746 VP8SSE16x8 = SSE16x8;

747 VP8SSE4x4 = SSE4x4;

748 VP8TDisto4x4 = Disto4x4;

749 VP8TDisto16x16 = Disto16x16;

750 VP8EncQuantizeBlock = QuantizeBlock;

751 VP8Copy4x4 = Copy4x4;

752 VP8Copy8x8 = Copy8x8;

753 VP8Copy16x16 = Copy16x16;

754

755 // If defined, use CPUInfo() to overwrite some pointers with faster versions.

756 if (VP8EncGetCPUInfo) {

757 if (VP8EncGetCPUInfo(kSSE2)) {

758 #if defined(__SSE2__) \|\| defined(_MSC_VER)

759 VP8EncDspInitSSE2();

760 #endif

761 }

762 if (VP8EncGetCPUInfo(kSSE3)) {

763 // later we'll plug some SSE3 variant here

764 }

765 }

766 }

767

768 #if defined(__cplusplus) \|\| defined(c_plusplus)

769 } // extern "C"

770 #endif

OLD	NEW

« no previous file with comments | « no previous file | third_party/libwebp/enc/enc_sse2.c » ('j') | no next file with comments »