OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "cc/resources/video_resource_updater.h" | 5 #include "cc/resources/video_resource_updater.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
289 if (software_compositor) | 289 if (software_compositor) |
290 return coded_size; | 290 return coded_size; |
291 | 291 |
292 int plane_width = media::VideoFrame::Columns( | 292 int plane_width = media::VideoFrame::Columns( |
293 plane_index, input_frame->format(), coded_size.width()); | 293 plane_index, input_frame->format(), coded_size.width()); |
294 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(), | 294 int plane_height = media::VideoFrame::Rows(plane_index, input_frame->format(), |
295 coded_size.height()); | 295 coded_size.height()); |
296 return gfx::Size(plane_width, plane_height); | 296 return gfx::Size(plane_width, plane_height); |
297 } | 297 } |
298 | 298 |
299 void VideoResourceUpdater::MakeHalfFloats(const uint16_t* src, | 299 namespace { |
300 int bits_per_channel, | 300 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the |
301 size_t num, | 301 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75). |
302 uint16_t* dst) { | 302 // |
303 // Source and dest stride can be zero since we're only copying | 303 // Half-floats are evaluated as: |
304 // one row at a time. | 304 // float value = pow(2.0, exponent - 25) * (0x400 + fraction); |
305 int stride = 0; | 305 // |
306 // Maximum value used in |src|. | 306 // In our case the exponent is 14 (since we or with 0x3800) and |
307 int max_value = (1 << bits_per_channel) - 1; | 307 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and |
308 int rows = 1; | 308 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and |
309 libyuv::HalfFloatPlane(src, stride, dst, stride, 1.0f / max_value, num, rows); | 309 // [0..0.24951171875] for 9-bit. |
310 // | |
311 // https://en.wikipedia.org/wiki/Half-precision_floating-point_format | |
312 class HalfFloatMaker_xor : public VideoResourceUpdater::HalfFloatMaker { | |
313 public: | |
314 explicit HalfFloatMaker_xor(int bits_per_channel) | |
315 : bits_per_channel_(bits_per_channel) {} | |
316 float Offset() const override { return 0.5; } | |
317 float Multiplier() const override { | |
318 int max_input_value = (1 << bits_per_channel_) - 1; | |
319 // 2 << 11 = 2048 would be 1.0 with our exponent. | |
320 return 2048.0 / max_input_value; | |
321 } | |
322 void MakeHalfFloats(const uint16_t* src, size_t num, uint16_t* dst) override { | |
323 // Micro-benchmarking indicates that the compiler does | |
324 // a good enough job of optimizing this loop that trying | |
325 // to manually operate on one uint64 at a time is not | |
326 // actually helpful. | |
327 // Note to future optimizers: Benchmark your optimizations! | |
328 for (size_t i = 0; i < num; i++) | |
329 dst[i] = src[i] | 0x3800; | |
fbarchard1
2016/10/21 22:46:06
this will be slow for compilers that dont vectoriz
hubbe
2016/10/21 22:55:04
Can we get rid of this code and move the decision
| |
330 } | |
331 | |
332 private: | |
333 int bits_per_channel_; | |
334 }; | |
335 | |
336 class HalfFloatMaker_libyuv : public VideoResourceUpdater::HalfFloatMaker { | |
337 public: | |
338 explicit HalfFloatMaker_libyuv(int bits_per_channel) { | |
339 int max_value = (1 << bits_per_channel) - 1; | |
340 // For less than 15 bits, we can give libyuv a multiplier of | |
341 // 1.0, which is faster on some platforms. If bits is 16 or larger, | |
342 // a multiplier of 1.0 would cause overflows. However, a multiplier | |
343 // of 1/max_value would cause subnormal floats, which perform | |
344 // very poorly on some platforms. | |
345 if (bits_per_channel <= 15) { | |
346 libyuv_multiplier_ = 1.0f; | |
347 } else { | |
348 // This multiplier makes sure that we avoid subnormal values. | |
349 libyuv_multiplier_ = 1.0f / 4096.0f; | |
fbarchard1
2016/10/21 22:46:06
have you tested using 1.0?
values near 65535 will
hubbe
2016/10/21 22:55:04
I have not tested the inf case.
The smallest poss
| |
350 } | |
351 resource_multiplier_ = 1.0f / libyuv_multiplier_ / max_value; | |
352 } | |
353 float Offset() const override { return 0.0f; } | |
354 float Multiplier() const override { return resource_multiplier_; } | |
355 void MakeHalfFloats(const uint16_t* src, size_t num, uint16_t* dst) override { | |
356 // Source and dest stride can be zero since we're only copying | |
357 // one row at a time. | |
358 int stride = 0; | |
359 int rows = 1; | |
360 libyuv::HalfFloatPlane(src, stride, dst, stride, libyuv_multiplier_, num, | |
361 rows); | |
362 } | |
363 | |
364 private: | |
365 float libyuv_multiplier_; | |
366 float resource_multiplier_; | |
367 }; | |
368 | |
369 } // namespace | |
370 | |
371 std::unique_ptr<VideoResourceUpdater::HalfFloatMaker> | |
372 VideoResourceUpdater::NewHalfFloatMaker(int bits_per_channel) { | |
373 if (bits_per_channel < 11) { | |
374 return std::unique_ptr<VideoResourceUpdater::HalfFloatMaker>( | |
375 new HalfFloatMaker_xor(bits_per_channel)); | |
376 } else { | |
377 return std::unique_ptr<VideoResourceUpdater::HalfFloatMaker>( | |
378 new HalfFloatMaker_libyuv(bits_per_channel)); | |
379 } | |
310 } | 380 } |
311 | 381 |
312 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes( | 382 VideoFrameExternalResources VideoResourceUpdater::CreateForSoftwarePlanes( |
313 scoped_refptr<media::VideoFrame> video_frame) { | 383 scoped_refptr<media::VideoFrame> video_frame) { |
314 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes"); | 384 TRACE_EVENT0("cc", "VideoResourceUpdater::CreateForSoftwarePlanes"); |
315 const media::VideoPixelFormat input_frame_format = video_frame->format(); | 385 const media::VideoPixelFormat input_frame_format = video_frame->format(); |
316 | 386 |
317 // TODO(hubbe): Make this a video frame method. | 387 // TODO(hubbe): Make this a video frame method. |
318 int bits_per_channel = 0; | 388 int bits_per_channel = 0; |
319 switch (input_frame_format) { | 389 switch (input_frame_format) { |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
477 plane_resource.resource_id())); | 547 plane_resource.resource_id())); |
478 mailbox.set_color_space(video_frame->ColorSpace()); | 548 mailbox.set_color_space(video_frame->ColorSpace()); |
479 external_resources.mailboxes.push_back(mailbox); | 549 external_resources.mailboxes.push_back(mailbox); |
480 external_resources.release_callbacks.push_back(base::Bind( | 550 external_resources.release_callbacks.push_back(base::Bind( |
481 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); | 551 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); |
482 external_resources.type = VideoFrameExternalResources::RGBA_RESOURCE; | 552 external_resources.type = VideoFrameExternalResources::RGBA_RESOURCE; |
483 } | 553 } |
484 return external_resources; | 554 return external_resources; |
485 } | 555 } |
486 | 556 |
557 std::unique_ptr<HalfFloatMaker> half_float_maker; | |
558 if (resource_provider_->YuvResourceFormat(bits_per_channel) == | |
559 LUMINANCE_F16) { | |
560 half_float_maker = NewHalfFloatMaker(bits_per_channel); | |
561 external_resources.offset = half_float_maker->Offset(); | |
562 external_resources.multiplier = half_float_maker->Multiplier(); | |
563 } | |
564 | |
487 for (size_t i = 0; i < plane_resources.size(); ++i) { | 565 for (size_t i = 0; i < plane_resources.size(); ++i) { |
488 PlaneResource& plane_resource = *plane_resources[i]; | 566 PlaneResource& plane_resource = *plane_resources[i]; |
489 // Update each plane's resource id with its content. | 567 // Update each plane's resource id with its content. |
490 DCHECK_EQ(plane_resource.resource_format(), | 568 DCHECK_EQ(plane_resource.resource_format(), |
491 resource_provider_->YuvResourceFormat(bits_per_channel)); | 569 resource_provider_->YuvResourceFormat(bits_per_channel)); |
492 | 570 |
493 if (!plane_resource.Matches(video_frame->unique_id(), i)) { | 571 if (!plane_resource.Matches(video_frame->unique_id(), i)) { |
494 // TODO(hubbe): Move all conversion (and upload?) code to media/. | 572 // TODO(hubbe): Move all conversion (and upload?) code to media/. |
495 // We need to transfer data from |video_frame| to the plane resource. | 573 // We need to transfer data from |video_frame| to the plane resource. |
496 // TODO(reveman): Can use GpuMemoryBuffers here to improve performance. | 574 // TODO(reveman): Can use GpuMemoryBuffers here to improve performance. |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
531 upload_image_stride * resource_size_pixels.height(); | 609 upload_image_stride * resource_size_pixels.height(); |
532 if (upload_pixels_.size() < needed_size) | 610 if (upload_pixels_.size() < needed_size) |
533 upload_pixels_.resize(needed_size); | 611 upload_pixels_.resize(needed_size); |
534 | 612 |
535 for (int row = 0; row < resource_size_pixels.height(); ++row) { | 613 for (int row = 0; row < resource_size_pixels.height(); ++row) { |
536 if (plane_resource.resource_format() == LUMINANCE_F16) { | 614 if (plane_resource.resource_format() == LUMINANCE_F16) { |
537 uint16_t* dst = reinterpret_cast<uint16_t*>( | 615 uint16_t* dst = reinterpret_cast<uint16_t*>( |
538 &upload_pixels_[upload_image_stride * row]); | 616 &upload_pixels_[upload_image_stride * row]); |
539 const uint16_t* src = reinterpret_cast<uint16_t*>( | 617 const uint16_t* src = reinterpret_cast<uint16_t*>( |
540 video_frame->data(i) + (video_stride_bytes * row)); | 618 video_frame->data(i) + (video_stride_bytes * row)); |
541 if (bits_per_channel <= 10) { | 619 half_float_maker->MakeHalfFloats(src, bytes_per_row / 2, dst); |
fbarchard1
2016/10/21 22:46:06
libyuv:HalfFloatPlane has some overhead to detect
hubbe
2016/10/21 22:55:04
Seems reasonable, but I would prefer to fix that a
| |
542 // Micro-benchmarking indicates that the compiler does | |
543 // a good enough job of optimizing this loop that trying | |
544 // to manually operate on one uint64 at a time is not | |
545 // actually helpful. | |
546 // Note to future optimizers: Benchmark your optimizations! | |
547 for (size_t i = 0; i < bytes_per_row / 2; i++) | |
548 dst[i] = src[i] | 0x3800; | |
549 } else { | |
550 MakeHalfFloats(src, bits_per_channel, bytes_per_row / 2, dst); | |
551 } | |
552 } else if (shift != 0) { | 620 } else if (shift != 0) { |
553 // We have more-than-8-bit input which we need to shift | 621 // We have more-than-8-bit input which we need to shift |
554 // down to fit it into an 8-bit texture. | 622 // down to fit it into an 8-bit texture. |
555 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; | 623 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; |
556 const uint16_t* src = reinterpret_cast<uint16_t*>( | 624 const uint16_t* src = reinterpret_cast<uint16_t*>( |
557 video_frame->data(i) + (video_stride_bytes * row)); | 625 video_frame->data(i) + (video_stride_bytes * row)); |
558 for (size_t i = 0; i < bytes_per_row; i++) | 626 for (size_t i = 0; i < bytes_per_row; i++) |
559 dst[i] = src[i] >> shift; | 627 dst[i] = src[i] >> shift; |
560 } else { | 628 } else { |
561 // Input and output are the same size and format, but | 629 // Input and output are the same size and format, but |
562 // differ in stride, copy one row at a time. | 630 // differ in stride, copy one row at a time. |
563 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; | 631 uint8_t* dst = &upload_pixels_[upload_image_stride * row]; |
564 const uint8_t* src = | 632 const uint8_t* src = |
565 video_frame->data(i) + (video_stride_bytes * row); | 633 video_frame->data(i) + (video_stride_bytes * row); |
566 memcpy(dst, src, bytes_per_row); | 634 memcpy(dst, src, bytes_per_row); |
567 } | 635 } |
568 } | 636 } |
569 pixels = &upload_pixels_[0]; | 637 pixels = &upload_pixels_[0]; |
570 } | 638 } |
571 | 639 |
572 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels, | 640 resource_provider_->CopyToResource(plane_resource.resource_id(), pixels, |
573 resource_size_pixels); | 641 resource_size_pixels); |
574 plane_resource.SetUniqueId(video_frame->unique_id(), i); | 642 plane_resource.SetUniqueId(video_frame->unique_id(), i); |
575 } | 643 } |
576 | 644 |
577 if (plane_resource.resource_format() == LUMINANCE_F16) { | |
578 // If the input data was 9 or 10 bit, and we output to half-floats, | |
579 // then we used the OR path above, which means that we need to | |
580 // adjust the resource offset and multiplier accordingly. If the | |
581 // input data uses more than 10 bits, it will already be normalized | |
582 // to 0.0..1.0, so there is no need to do anything. | |
583 if (bits_per_channel <= 10) { | |
584 // By OR-ing with 0x3800, 10-bit numbers become half-floats in the | |
585 // range [0.5..1) and 9-bit numbers get the range [0.5..0.75). | |
586 // | |
587 // Half-floats are evaluated as: | |
588 // float value = pow(2.0, exponent - 25) * (0x400 + fraction); | |
589 // | |
590 // In our case the exponent is 14 (since we or with 0x3800) and | |
591 // pow(2.0, 14-25) * 0x400 evaluates to 0.5 (our offset) and | |
592 // pow(2.0, 14-25) * fraction is [0..0.49951171875] for 10-bit and | |
593 // [0..0.24951171875] for 9-bit. | |
594 // | |
595 // https://en.wikipedia.org/wiki/Half-precision_floating-point_format | |
596 // | |
597 // PLEASE NOTE: | |
598 // All planes are assumed to use the same multiplier/offset. | |
599 external_resources.offset = 0.5f; | |
600 // Max value from input data. | |
601 int max_input_value = (1 << bits_per_channel) - 1; | |
602 // 2 << 11 = 2048 would be 1.0 with our exponent. | |
603 external_resources.multiplier = 2048.0 / max_input_value; | |
604 } | |
605 } | |
606 | |
607 // VideoResourceUpdater shares a context with the compositor so a | 645 // VideoResourceUpdater shares a context with the compositor so a |
608 // sync token is not required. | 646 // sync token is not required. |
609 TextureMailbox mailbox(plane_resource.mailbox(), gpu::SyncToken(), | 647 TextureMailbox mailbox(plane_resource.mailbox(), gpu::SyncToken(), |
610 resource_provider_->GetResourceTextureTarget( | 648 resource_provider_->GetResourceTextureTarget( |
611 plane_resource.resource_id())); | 649 plane_resource.resource_id())); |
612 mailbox.set_color_space(video_frame->ColorSpace()); | 650 mailbox.set_color_space(video_frame->ColorSpace()); |
613 external_resources.mailboxes.push_back(mailbox); | 651 external_resources.mailboxes.push_back(mailbox); |
614 external_resources.release_callbacks.push_back(base::Bind( | 652 external_resources.release_callbacks.push_back(base::Bind( |
615 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); | 653 &RecycleResource, AsWeakPtr(), plane_resource.resource_id())); |
616 } | 654 } |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
763 if (lost_resource) { | 801 if (lost_resource) { |
764 resource_it->clear_refs(); | 802 resource_it->clear_refs(); |
765 updater->DeleteResource(resource_it); | 803 updater->DeleteResource(resource_it); |
766 return; | 804 return; |
767 } | 805 } |
768 | 806 |
769 resource_it->remove_ref(); | 807 resource_it->remove_ref(); |
770 } | 808 } |
771 | 809 |
772 } // namespace cc | 810 } // namespace cc |
OLD | NEW |