Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1282)

Side by Side Diff: cc/resources/video_resource_updater.cc

Issue 2444463002: Change half-float conversion to use 1.0 multiplier (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "cc/resources/video_resource_updater.h" 5 #include "cc/resources/video_resource_updater.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <stdint.h> 8 #include <stdint.h>
9 9
10 #include <algorithm> 10 #include <algorithm>
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 if (software_compositor) 289 if (software_compositor)
290 return coded_size; 290 return coded_size;
291 291
292 int plane_width = media::VideoFrame::Columns( 292 int plane_width = media::VideoFrame::Columns(
293 plane_index, input_frame->format(), coded_size.width()); 293 plane_index, input_frame->format(), coded_size.width());
294 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(), 294 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(),
295 coded_size.height()); 295 coded_size.height());
296 return gfx::Size(plane_width, plane_height); 296 return gfx::Size(plane_width, plane_height);
297 } 297 }
298 298
299 void VideoResourceUpdater::MakeHalfFloats(const uint16_t* src, 299 namespace {
300 int bits_per_channel, 300 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the
301 size_t num, 301 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75).
302 uint16_t* dst) { 302 //
303 // Source and dest stride can be zero since we're only copying 303 // Half-floats are evaluated as:
304 // one row at a time. 304 // float value = pow(2.0, exponent - 25) * (0x400 + fraction);
305 int stride = 0; 305 //
306 // Maximum value used in |src|. 306 // In our case the exponent is 14 (since we or with 0x3800) and
307 int max_value = (1 << bits_per_channel) - 1; 307 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and
308 int rows = 1; 308 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and
309 libyuv::HalfFloatPlane(src, stride, dst, stride, 1.0f / max_value, num, rows); 309 // [0..0.24951171875] for 9-bit.
310 //
311 // https://en.wikipedia.org/wiki/Half-precision_floating-point_format
312 class HalfFloatMaker_xor : public VideoResourceUpdater::HalfFloatMaker {
313 public:
314 explicit HalfFloatMaker_xor(int bits_per_channel)
315 : bits_per_channel_(bits_per_channel) {}
316 float Offset() const override { return 0.5; }
317 float Multiplier() const override {
318 int max_input_value = (1 << bits_per_channel_) - 1;
319 // 2 << 11 = 2048 would be 1.0 with our exponent.
320 return 2048.0 / max_input_value;
321 }
322 void MakeHalfFloats(const uint16_t* src, size_t num, uint16_t* dst) override {
323 // Micro-benchmarking indicates that the compiler does
324 // a good enough job of optimizing this loop that trying
325 // to manually operate on one uint64 at a time is not
326 // actually helpful.
327 // Note to future optimizers: Benchmark your optimizations!
328 for (size_t i = 0; i < num; i++)
329 dst[i] = src[i] | 0x3800;
fbarchard1 2016/10/21 22:46:06 this will be slow for compilers that dont vectoriz
hubbe 2016/10/21 22:55:04 Can we get rid of this code and move the decision
330 }
331
332 private:
333 int bits_per_channel_;
334 };
335
336 class HalfFloatMaker_libyuv : public VideoResourceUpdater::HalfFloatMaker {
337 public:
338 explicit HalfFloatMaker_libyuv(int bits_per_channel) {
339 int max_value = (1 << bits_per_channel) - 1;
340 // For less than 15 bits, we can give libyuv a multiplier of
341 // 1.0, which is faster on some platforms. If bits is 16 or larger,
342 // a multiplier of 1.0 would cause overflows. However, a multiplier
343 // of 1/max_value would cause subnormal floats, which perform
344 // very poorly on some platforms.
345 if (bits_per_channel <= 15) {
346 libyuv_multiplier_ = 1.0f;
347 } else {
348 // This multiplier makes sure that we avoid subnormal values.
349 libyuv_multiplier_ = 1.0f / 4096.0f;
fbarchard1 2016/10/21 22:46:06 have you tested using 1.0? values near 65535 will
hubbe 2016/10/21 22:55:04 I have not tested the inf case. The smallest poss
350 }
351 resource_multiplier_ = 1.0f / libyuv_multiplier_ / max_value;
352 }
353 float Offset() const override { return 0.0f; }
354 float Multiplier() const override { return resource_multiplier_; }
355 void MakeHalfFloats(const uint16_t* src, size_t num, uint16_t* dst) override {
356 // Source and dest stride can be zero since we're only copying
357 // one row at a time.
358 int stride = 0;
359 int rows = 1;
360 libyuv::HalfFloatPlane(src, stride, dst, stride, libyuv_multiplier_, num,
361 rows);
362 }
363
364 private:
365 float libyuv_multiplier_;
366 float resource_multiplier_;
367 };
368
369 } // namespace
370
371 std::unique_ptr<VideoResourceUpdater::HalfFloatMaker>
372 VideoResourceUpdater::NewHalfFloatMaker(int bits_per_channel) {
373 if (bits_per_channel < 11) {
374 return std::unique_ptr<VideoResourceUpdater::HalfFloatMaker>(
375 new HalfFloatMaker_xor(bits_per_channel));
376 } else {
377 return std::unique_ptr<VideoResourceUpdater::HalfFloatMaker>(
378 new HalfFloatMaker_libyuv(bits_per_channel));
379 }
310 } 380 }
311 381
312 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes( 382 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes(
313 scoped_refptr<media::VideoFrame> video_frame) { 383 scoped_refptr<media::VideoFrame> video_frame) {
314 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes"); 384 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes");
315 const media::VideoPixelFormat input_frame_format = video_frame->format(); 385 const media::VideoPixelFormat input_frame_format = video_frame->format();
316 386
317 // TODO(hubbe): Make this a video frame method. 387 // TODO(hubbe): Make this a video frame method.
318 int bits_per_channel = 0; 388 int bits_per_channel = 0;
319 switch (input_frame_format) { 389 switch (input_frame_format) {
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 plane_resource.resource_id())); 547 plane_resource.resource_id()));
478 mailbox.set_color_space(video_frame->ColorSpace()); 548 mailbox.set_color_space(video_frame->ColorSpace());
479 external_resources.mailboxes.push_back(mailbox); 549 external_resources.mailboxes.push_back(mailbox);
480 external_resources.release_callbacks.push_back(base::Bind( 550 external_resources.release_callbacks.push_back(base::Bind(
481 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); 551 &RecycleResource, AsWeakPtr(), plane_resource.resource_id()));
482 external_resources.type = VideoFrameExternalResources::RGBA_RESOURCE; 552 external_resources.type = VideoFrameExternalResources::RGBA_RESOURCE;
483 } 553 }
484 return external_resources; 554 return external_resources;
485 } 555 }
486 556
557 std::unique_ptr<HalfFloatMaker> half_float_maker;
558 if (resource_provider_->YuvResourceFormat(bits_per_channel) ==
559 LUMINANCE_F16) {
560 half_float_maker = NewHalfFloatMaker(bits_per_channel);
561 external_resources.offset = half_float_maker->Offset();
562 external_resources.multiplier = half_float_maker->Multiplier();
563 }
564
487 for (size_t i = 0; i < plane_resources.size(); ++i) { 565 for (size_t i = 0; i < plane_resources.size(); ++i) {
488 PlaneResource& plane_resource = *plane_resources[i]; 566 PlaneResource& plane_resource = *plane_resources[i];
489 // Update each plane's resource id with its content. 567 // Update each plane's resource id with its content.
490 DCHECK_EQ(plane_resource.resource_format(), 568 DCHECK_EQ(plane_resource.resource_format(),
491 resource_provider_->YuvResourceFormat(bits_per_channel)); 569 resource_provider_->YuvResourceFormat(bits_per_channel));
492 570
493 if (!plane_resource.Matches(video_frame->unique_id(), i)) { 571 if (!plane_resource.Matches(video_frame->unique_id(), i)) {
494 // TODO(hubbe): Move all conversion (and upload?) code to media/. 572 // TODO(hubbe): Move all conversion (and upload?) code to media/.
495 // We need to transfer data from |video_frame| to the plane resource. 573 // We need to transfer data from |video_frame| to the plane resource.
496 // TODO(reveman): Can use GpuMemoryBuffers here to improve performance. 574 // TODO(reveman): Can use GpuMemoryBuffers here to improve performance.
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
531 upload_image_stride * resource_size_pixels.height(); 609 upload_image_stride * resource_size_pixels.height();
532 if (upload_pixels_.size() < needed_size) 610 if (upload_pixels_.size() < needed_size)
533 upload_pixels_.resize(needed_size); 611 upload_pixels_.resize(needed_size);
534 612
535 for (int row = 0; row < resource_size_pixels.height(); ++row) { 613 for (int row = 0; row < resource_size_pixels.height(); ++row) {
536 if (plane_resource.resource_format() == LUMINANCE_F16) { 614 if (plane_resource.resource_format() == LUMINANCE_F16) {
537 uint16_t* dst = reinterpret_cast<uint16_t*>( 615 uint16_t* dst = reinterpret_cast<uint16_t*>(
538 &upload_pixels_[upload_image_stride * row]); 616 &upload_pixels_[upload_image_stride * row]);
539 const uint16_t* src = reinterpret_cast<uint16_t*>( 617 const uint16_t* src = reinterpret_cast<uint16_t*>(
540 video_frame->data(i) + (video_stride_bytes * row)); 618 video_frame->data(i) + (video_stride_bytes * row));
541 if (bits_per_channel <= 10) { 619 half_float_maker->MakeHalfFloats(src, bytes_per_row / 2, dst);
fbarchard1 2016/10/21 22:46:06 libyuv:HalfFloatPlane has some overhead to detect
hubbe 2016/10/21 22:55:04 Seems reasonable, but I would prefer to fix that a
542 // Micro-benchmarking indicates that the compiler does
543 // a good enough job of optimizing this loop that trying
544 // to manually operate on one uint64 at a time is not
545 // actually helpful.
546 // Note to future optimizers: Benchmark your optimizations!
547 for (size_t i = 0; i < bytes_per_row / 2; i++)
548 dst[i] = src[i] | 0x3800;
549 } else {
550 MakeHalfFloats(src, bits_per_channel, bytes_per_row / 2, dst);
551 }
552 } else if (shift != 0) { 620 } else if (shift != 0) {
553 // We have more-than-8-bit input which we need to shift 621 // We have more-than-8-bit input which we need to shift
554 // down to fit it into an 8-bit texture. 622 // down to fit it into an 8-bit texture.
555 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; 623 uint8_t* dst = &upload_pixels_[upload_image_stride * row];
556 const uint16_t* src = reinterpret_cast<uint16_t*>( 624 const uint16_t* src = reinterpret_cast<uint16_t*>(
557 video_frame->data(i) + (video_stride_bytes * row)); 625 video_frame->data(i) + (video_stride_bytes * row));
558 for (size_t i = 0; i < bytes_per_row; i++) 626 for (size_t i = 0; i < bytes_per_row; i++)
559 dst[i] = src[i] >> shift; 627 dst[i] = src[i] >> shift;
560 } else { 628 } else {
561 // Input and output are the same size and format, but 629 // Input and output are the same size and format, but
562 // differ in stride, copy one row at a time. 630 // differ in stride, copy one row at a time.
563 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; 631 uint8_t* dst = &upload_pixels_[upload_image_stride * row];
564 const uint8_t* src = 632 const uint8_t* src =
565 video_frame->data(i) + (video_stride_bytes * row); 633 video_frame->data(i) + (video_stride_bytes * row);
566 memcpy(dst, src, bytes_per_row); 634 memcpy(dst, src, bytes_per_row);
567 } 635 }
568 } 636 }
569 pixels = &upload_pixels_[0]; 637 pixels = &upload_pixels_[0];
570 } 638 }
571 639
572 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels, 640 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels,
573 resource_size_pixels); 641 resource_size_pixels);
574 plane_resource.SetUniqueId(video_frame->unique_id(), i); 642 plane_resource.SetUniqueId(video_frame->unique_id(), i);
575 } 643 }
576 644
577 if (plane_resource.resource_format() == LUMINANCE_F16) {
578 // If the input data was 9 or 10 bit, and we output to half-floats,
579 // then we used the OR path above, which means that we need to
580 // adjust the resource offset and multiplier accordingly. If the
581 // input data uses more than 10 bits, it will already be normalized
582 // to 0.0..1.0, so there is no need to do anything.
583 if (bits_per_channel <= 10) {
584 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the
585 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75).
586 //
587 // Half-floats are evaluated as:
588 // float value = pow(2.0, exponent - 25) * (0x400 + fraction);
589 //
590 // In our case the exponent is 14 (since we or with 0x3800) and
591 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and
592 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and
593 // [0..0.24951171875] for 9-bit.
594 //
595 // https://en.wikipedia.org/wiki/Half-precision_floating-point_format
596 //
597 // PLEASE NOTE:
598 // All planes are assumed to use the same multiplier/offset.
599 external_resources.offset = 0.5f;
600 // Max value from input data.
601 int max_input_value = (1 << bits_per_channel) - 1;
602 // 2 << 11 = 2048 would be 1.0 with our exponent.
603 external_resources.multiplier = 2048.0 / max_input_value;
604 }
605 }
606
607 // VideoResourceUpdater shares a context with the compositor so a 645 // VideoResourceUpdater shares a context with the compositor so a
608 // sync token is not required. 646 // sync token is not required.
609 TextureMailbox mailbox(plane_resource.mailbox(), gpu::SyncToken(), 647 TextureMailbox mailbox(plane_resource.mailbox(), gpu::SyncToken(),
610 resource_provider_->GetResourceTextureTarget( 648 resource_provider_->GetResourceTextureTarget(
611 plane_resource.resource_id())); 649 plane_resource.resource_id()));
612 mailbox.set_color_space(video_frame->ColorSpace()); 650 mailbox.set_color_space(video_frame->ColorSpace());
613 external_resources.mailboxes.push_back(mailbox); 651 external_resources.mailboxes.push_back(mailbox);
614 external_resources.release_callbacks.push_back(base::Bind( 652 external_resources.release_callbacks.push_back(base::Bind(
615 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); 653 &RecycleResource, AsWeakPtr(), plane_resource.resource_id()));
616 } 654 }
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
763 if (lost_resource) { 801 if (lost_resource) {
764 resource_it->clear_refs(); 802 resource_it->clear_refs();
765 updater->DeleteResource(resource_it); 803 updater->DeleteResource(resource_it);
766 return; 804 return;
767 } 805 }
768 806
769 resource_it->remove_ref(); 807 resource_it->remove_ref();
770 } 808 }
771 809
772 } // namespace cc 810 } // namespace cc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698