content/common/gpu/media/v4l2_slice_video_decode_accelerator.h - Issue 833063003: Add accelerated video decoder interface, VP8 and H.264 implementations and hook up to V4L2SVDA.

Side by Side Diff: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h

Issue 833063003: Add accelerated video decoder interface, VP8 and H.264 implementations and hook up to V4L2SVDA. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « content/common/gpu/media/v4l2_image_processor.cc ('k') | content/common/gpu/media/v4l2_slice_video_decode_accelerator.cc » ('j') | content/common/gpu/media/v4l2_slice_video_decode_accelerator.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_

	6 #define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_

	7

	8 #include <linux/videodev2.h>

	9 #include <queue>

	10 #include <vector>

	11

	12 #include "base/memory/linked_ptr.h"

	13 #include "base/memory/ref_counted.h"

	14 #include "base/memory/scoped_ptr.h"

	15 #include "base/memory/weak_ptr.h"

	16 #include "base/synchronization/waitable_event.h"

	17 #include "base/threading/thread.h"

	18 #include "content/common/content_export.h"

	19 #include "content/common/gpu/media/h264_decoder.h"

	20 #include "content/common/gpu/media/v4l2_video_device.h"

	21 #include "content/common/gpu/media/vp8_decoder.h"

	22 #include "media/video/video_decode_accelerator.h"

	23

	24 namespace content {

	25

	26 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice

	27 // level codec API for decoding. The slice level API provides only a low-level

	28 // decoding functionality and requires userspace to provide support for parsing

	29 // the input stream and managing decoder state across frames.

	30 class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator

	31 : public media::VideoDecodeAccelerator {

	32 public:

	33 class V4L2DecodeSurface : public base::RefCounted<V4L2DecodeSurface> {

	34 public:

	35 using ReleaseCB = base::Callback<void(int)>;

	36

	37 V4L2DecodeSurface(int32 bitstream_id,

	38 int input_record,

	39 int output_record,

	40 const ReleaseCB& release_cb);

	41 virtual ~V4L2DecodeSurface();

	42

	43 // Mark the surface as decoded. This will also release all references, as

	44 // they are not needed anymore.

	45 void SetDecoded();

	46 bool decoded() const { return decoded_; }

	47

	48 int32 bitstream_id() const { return bitstream_id_; }

	49 int input_record() const { return input_record_; }

	50 int output_record() const { return output_record_; }

	51 uint32_t config_store() const { return config_store_; }

	52

	53 // Take references to each reference surface and keep them until the

	54 // target surface is decoded.

	55 void SetReferenceSurfaces(

	56 const std::vector<scoped_refptr<V4L2DecodeSurface>>& ref_surfaces);

	57

	58 std::string ToString() const;

	59

	60 private:

	61 int32 bitstream_id_;

	62 int input_record_;

	63 int output_record_;

	64 uint32_t config_store_;

	65

	66 bool decoded_;

	67 ReleaseCB release_cb_;

	68

	69 std::vector<scoped_refptr<V4L2DecodeSurface>> reference_surfaces_;

	70

	71 DISALLOW_COPY_AND_ASSIGN(V4L2DecodeSurface);

	72 };

	73

	74 V4L2SliceVideoDecodeAccelerator(

	75 const scoped_refptr<V4L2Device>& device,

	76 EGLDisplay egl_display,

	77 EGLContext egl_context,

	78 const base::WeakPtr<Client>& io_client_,

	79 const base::Callback<bool(void)>& make_context_current,

	80 const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);

	81 virtual ~V4L2SliceVideoDecodeAccelerator();

	82

	83 // media::VideoDecodeAccelerator implementation.

	84 virtual bool Initialize(media::VideoCodecProfile profile,

	85 VideoDecodeAccelerator::Client* client) override;

	86 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) override;

	87 virtual void AssignPictureBuffers(

	88 const std::vector<media::PictureBuffer>& buffers) override;

	89 virtual void ReusePictureBuffer(int32 picture_buffer_id) override;

	90 virtual void Flush() override;

	91 virtual void Reset() override;

	92 virtual void Destroy() override;

	93 virtual bool CanDecodeOnIOThread() override;

	94

	95 bool SubmitSlice(int index, const uint8_t* data, size_t size);

	96 bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);

	97

	98 private:

	99 class V4L2H264Accelerator : public H264Decoder::H264Accelerator {

	100 public:

	101 V4L2H264Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);

	102 virtual ~V4L2H264Accelerator() {}

	103

	104 // H264Decoder::H264Accelerator implementation.

	105 scoped_refptr<H264Picture> CreateH264Picture() override;

	106

	107 bool SubmitFrameMetadata(const media::H264SPS* sps,

	108 const media::H264PPS* pps,

	109 const H264DPB& dpb,

	110 const H264Picture::Vector& ref_pic_listp0,

	111 const H264Picture::Vector& ref_pic_listb0,

	112 const H264Picture::Vector& ref_pic_listb1,

	113 const scoped_refptr<H264Picture>& pic) override;

	114

	115 bool SubmitSlice(const media::H264PPS* pps,

	116 const media::H264SliceHeader* slice_hdr,

	117 const H264Picture::Vector& ref_pic_list0,

	118 const H264Picture::Vector& ref_pic_list1,

	119 const scoped_refptr<H264Picture>& pic,

	120 const uint8_t* data,

	121 size_t size) override;

	122

	123 bool SubmitDecode(const scoped_refptr<H264Picture>& pic) override;

	124 bool OutputPicture(const scoped_refptr<H264Picture>& pic) override;

	125

	126 private:

	127 void H264PictureListToDPBIndicesList(

	128 const H264Picture::Vector& src_pic_list,

	129 uint8_t dst_list[32]);

	130

	131 void H264DPBToV4L2DPB(

	132 const H264DPB& dpb,

	133 std::vector<scoped_refptr<V4L2DecodeSurface>>* ref_surfaces);

	134

	135 scoped_refptr<V4L2DecodeSurface> H264PictureToV4L2DecodeSurface(

	136 const scoped_refptr<H264Picture>& pic);

	137

	138 size_t num_slices_;

	139 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;

	140

	141 // TODO(posciak): This should be queried from hardware once supported.

	142 static const size_t kMaxSlices = 16;

	143 struct v4l2_ctrl_h264_slice_param v4l2_slice_params_[kMaxSlices];

	144 struct v4l2_ctrl_h264_decode_param v4l2_decode_param_;

	145

	146 DISALLOW_COPY_AND_ASSIGN(V4L2H264Accelerator);

	147 };

	148

	149 class V4L2VP8Accelerator : public VP8Decoder::VP8Accelerator {

	150 public:

	151 V4L2VP8Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);

	152 virtual ~V4L2VP8Accelerator() {}

	153

	154 // H264Decoder::VP8Accelerator implementation.

	155 scoped_refptr<VP8Picture> CreateVP8Picture() override;

	156

	157 bool SubmitDecode(const scoped_refptr<VP8Picture>& pic,

	158 const media::Vp8FrameHeader* frame_hdr,

	159 const scoped_refptr<VP8Picture>& last_frame,

	160 const scoped_refptr<VP8Picture>& golden_frame,

	161 const scoped_refptr<VP8Picture>& alt_frame) override;

	162

	163 bool OutputPicture(const scoped_refptr<VP8Picture>& pic) override;

	164

	165 private:

	166 scoped_refptr<V4L2DecodeSurface> VP8PictureToV4L2DecodeSurface(

	167 const scoped_refptr<VP8Picture>& pic);

	168

	169 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;

	170

	171 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Accelerator);

	172 };

	173

	174 // Record for input buffers.

	175 struct InputRecord {

	176 InputRecord();

	177 int32 input_id;

	178 void* address;

	179 size_t length;

	180 size_t bytes_used;

	181 bool at_device;

	182 };

	183

	184 // Record for output buffers.

	185 struct OutputRecord {

	186 OutputRecord();

	187 bool at_device;

	188 bool at_client;

	189 int32 picture_id;

	190 EGLImageKHR egl_image;

	191 EGLSyncKHR egl_sync;

	192 bool cleared;

	193 };

	194

	195 enum {

	196 // See http://crbug.com/255116.

	197 // Input bitstream buffer size for up to 1080p streams.

	198 kInputBufferMaxSizeFor1080p = 1024 * 1024,

	199 // Input bitstream buffer size for up to 4k streams.

	200 kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p,

	201 kNumInputBuffers = 16,

	202 };

	203

	204 // Recycle V4L2 output buffer with \|index\|. Used as surface release callback.

	205 void ReuseOutputBuffer(int index);

	206

	207 // Queue a \|dec_surface\| to device for decoding.

	208 void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);

	209

	210 // Dequeue any V4L2 buffers available and process.

	211 void Dequeue();

	212

	213 // V4L2 QBUF helpers.

	214 bool EnqueueInputRecord(int index, uint32_t config_store);

	215 bool EnqueueOutputRecord(int index);

	216

	217 // Set input and output formats in hardware.

	218 bool SetupFormats();

	219

	220 // Create input and output buffers.

	221 bool CreateInputBuffers();

	222 bool CreateOutputBuffers();

	223

	224 // Destroy input buffers.

	225 void DestroyInputBuffers();

	226

	227 // Destroy output buffers and release associated resources (textures,

	228 // EGLImages). If \|dismiss\| is true, also dismissing the associated

	229 // PictureBuffers.

	230 bool DestroyOutputs(bool dismiss);

	231

	232 // Used by DestroyOutputs.

	233 bool DestroyOutputBuffers();

	234

	235 // Dismiss all \|picture_buffer_ids\| via Client::DismissPictureBuffer()

	236 // and signal \|done\| after finishing.

	237 void DismissPictures(std::vector<int32> picture_buffer_ids,

	238 base::WaitableEvent* done);

	239

	240 // Task to finish initialization on decoder_thread_.

	241 void InitializeTask();

	242

	243 // Surface set change (resolution change) flow.

	244 // If we have no surfaces allocated, just allocate them and return.

	245 // Otherwise mark us as pending for surface set change.

	246 void InitiateSurfaceSetChange();

	247 // If a surface set change is pending and we are ready, stop the device,

	248 // destroy outputs, releasing resources and dismissing pictures as required,

	249 // followed by allocating a new set for the new resolution/DPB size

	250 // as provided by decoder. Finally, try to resume decoding.

	251 void FinishSurfaceSetChangeIfNeeded();

	252

	253 void NotifyError(Error error);

	254 void DestroyTask();

	255

	256 // Sets the state to kError and notifies client if needed.

	257 void SetErrorState(Error error);

	258

	259 // Flush flow when requested by client.

	260 // When Flush() is called, it posts a FlushTask, which checks the input queue.

	261 // If nothing is pending for decode on decoder_input_queue_, we call

	262 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef

	263 // onto the decoder_input_queue_ to schedule a flush. When we reach it later

	264 // on, we call InitiateFlush() to perform it at the correct time.

	265 void FlushTask();

	266 // Tell the decoder to flush all frames, reset it and mark us as scheduled

	267 // for flush, so that we can finish it once all pending decodes are finished.

	268 void InitiateFlush();

	269 // If all pending frames are decoded and we are waiting to flush, perform it.

	270 // This will send all pending pictures to client and notify the client that

	271 // flush is complete and puts us in a state ready to resume.

	272 void FinishFlushIfNeeded();

	273

	274 // Reset flow when requested by client.

	275 // Drop all inputs and reset the decoder and mark us as pending for reset.

	276 void ResetTask();

	277 // If all pending frames are decoded and we are waiting to reset, perform it.

	278 // This drops all pending outputs (client is not interested anymore),

	279 // notifies the client we are done and puts us in a state ready to resume.

	280 void FinishResetIfNeeded();

	281

	282 // Process pending events if any.

	283 void ProcessPendingEventsIfNeeded();

	284

	285 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_

	286 // returning an event.

	287 void ServiceDeviceTask();

	288

	289 // Schedule poll if we have any buffers queued and the poll thread

	290 // is not stopped (on surface set change).

	291 void SchedulePollIfNeeded();

	292

	293 // Attempt to start/stop device_poll_thread_.

	294 bool StartDevicePoll();

	295 bool StopDevicePoll(bool keep_input_state);

	296

	297 // Ran on device_poll_thread_ to wait for device events.

	298 void DevicePollTask(bool poll_device);

	299

	300 enum State {

	301 // We are in this state until Initialize() returns successfully.

	302 // We can't post errors to the client in this state yet.

	303 kUninitialized,

	304 // Initialize() returned successfully.

	305 kInitialized,

	306 // This state allows making progress decoding more input stream.

	307 kDecoding,

	308 // Transitional state when we are not decoding any more stream, but are

	309 // performing flush, reset, resolution change or are destroying ourselves.

	310 kIdle,

	311 // Error state, set when sending NotifyError to client.

	312 kError,

	313 };

	314

	315 enum BufferId {

	316 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask().

	317 };

	318

	319 void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);

	320 void DecodeBufferTask();

	321 void ScheduleDecodeBufferTaskIfNeeded();

	322 bool TrySetNewBistreamBuffer();

	323

	324 // Auto-destruction reference for EGLSync (for message-passing).

	325 struct EGLSyncKHRRef;

	326 void ReusePictureBufferTask(int32 picture_buffer_id,

	327 scoped_ptr<EGLSyncKHRRef> egl_sync_ref);

	328

	329 // Called by accelerator implementations:

	330 // Decode of \|dec_surface\| is ready to be submitted and all codec-specific

	331 // settings are set in hardware.

	332 void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);

	333

	334 // \|dec_surface\| is ready to be outputted once decode is finished.

	335 // This can be called before decode is actually done in hardware, and this

	336 // method is responsible to maintain the order, i.e. the surfaces will

	337 // be outputted in the same order as SurfaceReady calls. To do so, the

	338 // surfaces are put on decoder_display_queue_ and sent to output in that

	339 // order once all preceding surfaces are sent.

	340 void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);

	341

	342 // Called to actually send \|dec_surface\| to the client, after it is decoded

	343 // preserving the order in which it was scheduled via SurfaceReady().

	344 void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);

	345

	346 // Goes over the \|decoder_display_queue_\| and sends all buffers from the

	347 // front of the queue that are already decoded to the client, in order.

	348 void TryOutputSurfaces();

	349

	350 // Creates a new decode surface or returns nullptr if one is not available.

	351 scoped_refptr<V4L2DecodeSurface> CreateSurface();

	352

	353 // Send decoded pictures to PictureReady.

	354 void SendPictureReady();

	355

	356 // Callback that indicates a picture has been cleared.

	357 void PictureCleared();

	358

	359 size_t input_planes_count_;

	360 size_t output_planes_count_;

	361

	362 // GPU Child thread message loop.

	363 const scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;

	364

	365 // IO thread message loop.

	366 scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_;

	367

	368 // WeakPtr<> pointing to \|this\| for use in posting tasks from the decoder or

	369 // device worker threads back to the child thread.

	370 base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;

	371

	372 // To expose client callbacks from VideoDecodeAccelerator.

	373 // NOTE: all calls to these objects MUST be executed on

	374 // child_message_loop_proxy_.

	375 scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>

	376 client_ptr_factory_;

	377 base::WeakPtr<VideoDecodeAccelerator::Client> client_;

	378 // Callbacks to \|io_client_\| must be executed on \|io_message_loop_proxy_\|.

	379 base::WeakPtr<Client> io_client_;

	380

	381 // V4L2 device in use.

	382 scoped_refptr<V4L2Device> device_;

	383

	384 // Thread to communicate with the device on.

	385 base::Thread decoder_thread_;

	386 scoped_refptr<base::MessageLoopProxy> decoder_thread_proxy_;

	387

	388 // Thread used to poll the device for events.

	389 base::Thread device_poll_thread_;

	390

	391 // Input queue state.

	392 bool input_streamon_;

	393 // Number of input buffers enqueued to the device.

	394 int input_buffer_queued_count_;

	395 // Input buffers ready to use; LIFO since we don't care about ordering.

	396 std::list<int> free_input_buffers_;

	397 // Mapping of int index to an input buffer record.

	398 std::vector<InputRecord> input_buffer_map_;

	399

	400 // Output queue state.

	401 bool output_streamon_;

	402 // Number of output buffers enqueued to the device.

	403 int output_buffer_queued_count_;

	404 // Output buffers ready to use.

	405 std::list<int> free_output_buffers_;

	406 // Mapping of int index to an output buffer record.

	407 std::vector<OutputRecord> output_buffer_map_;

	408

	409 media::VideoCodecProfile video_profile_;

	410 uint32_t output_format_fourcc_;

	411 gfx::Size frame_buffer_size_;

	412 size_t output_dpb_size_;

	413

	414 struct BitstreamBufferRef;

	415 // Input queue of stream buffers coming from the client.

	416 std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;

	417 // BitstreamBuffer currently being processed.

	418 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;

	419

	420 // Queue storing decode surfaces ready to be output as soon as they are

	421 // decoded. The surfaces must be output in order they are queued.

	422 std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;

	423

	424 // Decoder state.

	425 State state_;

	426

	427 // If any of these are true, we are waiting for the device to finish decoding

	428 // all previously-queued frames, so we can finish the flush/reset/surface

	429 // change flows. These can stack.

	430 bool decoder_flushing_;

	431 bool decoder_resetting_;

	432 bool surface_set_change_pending_;

	433

	434 // Hardware accelerators.

	435 // TODO(posciak): Try to have a superclass here if possible.

	436 scoped_ptr<V4L2H264Accelerator> h264_accelerator_;

	437 scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_;

	438

	439 // Codec-specific software decoder in use.

	440 scoped_ptr<AcceleratedVideoDecoder> decoder_;

	441

	442 // Surfaces queued to device to keep references to them while decoded.

	443 using V4L2DecodeSurfaceByOutputId =

	444 std::map<int, scoped_refptr<V4L2DecodeSurface>>;

	445 V4L2DecodeSurfaceByOutputId surfaces_at_device_;

	446

	447 // Surfaces sent to client to keep references to them while displayed.

	448 using V4L2DecodeSurfaceByPictureBufferId =

	449 std::map<int32, scoped_refptr<V4L2DecodeSurface>>;

	450 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;

	451

	452 // Record for decoded pictures that can be sent to PictureReady.

	453 struct PictureRecord;

	454 // Pictures that are ready but not sent to PictureReady yet.

	455 std::queue<PictureRecord> pending_picture_ready_;

	456

	457 // The number of pictures that are sent to PictureReady and will be cleared.

	458 int picture_clearing_count_;

	459

	460 // Used by the decoder thread to wait for AssignPictureBuffers to arrive

	461 // to avoid races with potential Reset requests.

	462 base::WaitableEvent pictures_assigned_;

	463

	464 // Make the GL context current callback.

	465 base::Callback<bool(void)> make_context_current_;

	466

	467 // EGL state

	468 EGLDisplay egl_display_;

	469 EGLContext egl_context_;

	470

	471 // The WeakPtrFactory for \|weak_this_\|.

	472 base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;

	473

	474 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);

	475 };

	476

	477 // Codec-specific subclasses of software decoder picture classes.

	478 // This allows us to keep decoders oblivious of our implementation details.

	479 class V4L2H264Picture : public H264Picture {

	480 public:

	481 V4L2H264Picture(const scoped_refptr<

	482 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)

	483 : dec_surface_(dec_surface) {}

	484 virtual ~V4L2H264Picture() {}

	485

	486 V4L2H264Picture* AsV4L2H264Picture() override { return this; }

	487 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>

	488 dec_surface() {

	489 return dec_surface_;

	490 }

	491

	492 private:

	493 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>

	494 dec_surface_;

	495

	496 DISALLOW_COPY_AND_ASSIGN(V4L2H264Picture);

	497 };

	498

	499 class V4L2VP8Picture : public VP8Picture {

	500 public:

	501 V4L2VP8Picture(const scoped_refptr<

	502 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)

	503 : dec_surface_(dec_surface) {}

	504 virtual ~V4L2VP8Picture() {}

	505

	506 V4L2VP8Picture* AsV4L2VP8Picture() override { return this; }

	507 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>

	508 dec_surface() {

	509 return dec_surface_;

	510 }

	511

	512 private:

	513 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>

	514 dec_surface_;

	515

	516 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Picture);

	517 };

	518

	519 } // namespace content

	520

	521 #endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_

OLD	NEW