Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(289)

Side by Side Diff: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h

Issue 833063003: Add accelerated video decoder interface, VP8 and H.264 implementations and hook up to V4L2SVDA. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
6 #define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
7
8 #include <linux/videodev2.h>
9 #include <queue>
10 #include <vector>
11
12 #include "base/memory/linked_ptr.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/synchronization/waitable_event.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "content/common/gpu/media/h264_decoder.h"
20 #include "content/common/gpu/media/v4l2_video_device.h"
21 #include "content/common/gpu/media/vp8_decoder.h"
22 #include "media/video/video_decode_accelerator.h"
23
24 namespace content {
25
26 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
27 // level codec API for decoding. The slice level API provides only a low-level
28 // decoding functionality and requires userspace to provide support for parsing
29 // the input stream and managing decoder state across frames.
30 class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator
31 : public media::VideoDecodeAccelerator {
32 public:
33 class V4L2DecodeSurface : public base::RefCounted<V4L2DecodeSurface> {
34 public:
35 using ReleaseCB = base::Callback<void(int)>;
36
37 V4L2DecodeSurface(int32 bitstream_id,
38 int input_record,
39 int output_record,
40 const ReleaseCB& release_cb);
41 virtual ~V4L2DecodeSurface();
42
43 // Mark the surface as decoded. This will also release all references, as
44 // they are not needed anymore.
45 void SetDecoded();
46 bool decoded() const { return decoded_; }
47
48 int32 bitstream_id() const { return bitstream_id_; }
49 int input_record() const { return input_record_; }
50 int output_record() const { return output_record_; }
51 uint32_t config_store() const { return config_store_; }
52
53 // Take references to each reference surface and keep them until the
54 // target surface is decoded.
55 void SetReferenceSurfaces(
56 const std::vector<scoped_refptr<V4L2DecodeSurface>>& ref_surfaces);
57
58 std::string ToString() const;
59
60 private:
61 int32 bitstream_id_;
62 int input_record_;
63 int output_record_;
64 uint32_t config_store_;
65
66 bool decoded_;
67 ReleaseCB release_cb_;
68
69 std::vector<scoped_refptr<V4L2DecodeSurface>> reference_surfaces_;
70
71 DISALLOW_COPY_AND_ASSIGN(V4L2DecodeSurface);
72 };
73
74 V4L2SliceVideoDecodeAccelerator(
75 const scoped_refptr<V4L2Device>& device,
76 EGLDisplay egl_display,
77 EGLContext egl_context,
78 const base::WeakPtr<Client>& io_client_,
79 const base::Callback<bool(void)>& make_context_current,
80 const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);
81 virtual ~V4L2SliceVideoDecodeAccelerator();
82
83 // media::VideoDecodeAccelerator implementation.
84 virtual bool Initialize(media::VideoCodecProfile profile,
85 VideoDecodeAccelerator::Client* client) override;
86 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) override;
87 virtual void AssignPictureBuffers(
88 const std::vector<media::PictureBuffer>& buffers) override;
89 virtual void ReusePictureBuffer(int32 picture_buffer_id) override;
90 virtual void Flush() override;
91 virtual void Reset() override;
92 virtual void Destroy() override;
93 virtual bool CanDecodeOnIOThread() override;
94
95 bool SubmitSlice(int index, const uint8_t* data, size_t size);
96 bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
97
98 private:
99 class V4L2H264Accelerator : public H264Decoder::H264Accelerator {
100 public:
101 V4L2H264Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);
102 virtual ~V4L2H264Accelerator() {}
103
104 // H264Decoder::H264Accelerator implementation.
105 scoped_refptr<H264Picture> CreateH264Picture() override;
106
107 bool SubmitFrameMetadata(const media::H264SPS* sps,
108 const media::H264PPS* pps,
109 const H264DPB& dpb,
110 const H264Picture::Vector& ref_pic_listp0,
111 const H264Picture::Vector& ref_pic_listb0,
112 const H264Picture::Vector& ref_pic_listb1,
113 const scoped_refptr<H264Picture>& pic) override;
114
115 bool SubmitSlice(const media::H264PPS* pps,
116 const media::H264SliceHeader* slice_hdr,
117 const H264Picture::Vector& ref_pic_list0,
118 const H264Picture::Vector& ref_pic_list1,
119 const scoped_refptr<H264Picture>& pic,
120 const uint8_t* data,
121 size_t size) override;
122
123 bool SubmitDecode(const scoped_refptr<H264Picture>& pic) override;
124 bool OutputPicture(const scoped_refptr<H264Picture>& pic) override;
125
126 private:
127 void H264PictureListToDPBIndicesList(
128 const H264Picture::Vector& src_pic_list,
129 uint8_t dst_list[32]);
130
131 void H264DPBToV4L2DPB(
132 const H264DPB& dpb,
133 std::vector<scoped_refptr<V4L2DecodeSurface>>* ref_surfaces);
134
135 scoped_refptr<V4L2DecodeSurface> H264PictureToV4L2DecodeSurface(
136 const scoped_refptr<H264Picture>& pic);
137
138 size_t num_slices_;
139 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;
140
141 // TODO(posciak): This should be queried from hardware once supported.
142 static const size_t kMaxSlices = 16;
143 struct v4l2_ctrl_h264_slice_param v4l2_slice_params_[kMaxSlices];
144 struct v4l2_ctrl_h264_decode_param v4l2_decode_param_;
145
146 DISALLOW_COPY_AND_ASSIGN(V4L2H264Accelerator);
147 };
148
149 class V4L2VP8Accelerator : public VP8Decoder::VP8Accelerator {
150 public:
151 V4L2VP8Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);
152 virtual ~V4L2VP8Accelerator() {}
153
154 // H264Decoder::VP8Accelerator implementation.
155 scoped_refptr<VP8Picture> CreateVP8Picture() override;
156
157 bool SubmitDecode(const scoped_refptr<VP8Picture>& pic,
158 const media::Vp8FrameHeader* frame_hdr,
159 const scoped_refptr<VP8Picture>& last_frame,
160 const scoped_refptr<VP8Picture>& golden_frame,
161 const scoped_refptr<VP8Picture>& alt_frame) override;
162
163 bool OutputPicture(const scoped_refptr<VP8Picture>& pic) override;
164
165 private:
166 scoped_refptr<V4L2DecodeSurface> VP8PictureToV4L2DecodeSurface(
167 const scoped_refptr<VP8Picture>& pic);
168
169 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;
170
171 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Accelerator);
172 };
173
174 // Record for input buffers.
175 struct InputRecord {
176 InputRecord();
177 int32 input_id;
178 void* address;
179 size_t length;
180 size_t bytes_used;
181 bool at_device;
182 };
183
184 // Record for output buffers.
185 struct OutputRecord {
186 OutputRecord();
187 bool at_device;
188 bool at_client;
189 int32 picture_id;
190 EGLImageKHR egl_image;
191 EGLSyncKHR egl_sync;
192 bool cleared;
193 };
194
195 enum {
196 // See http://crbug.com/255116.
197 // Input bitstream buffer size for up to 1080p streams.
198 kInputBufferMaxSizeFor1080p = 1024 * 1024,
199 // Input bitstream buffer size for up to 4k streams.
200 kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p,
201 kNumInputBuffers = 16,
202 };
203
204 // Recycle V4L2 output buffer with |index|. Used as surface release callback.
205 void ReuseOutputBuffer(int index);
206
207 // Queue a |dec_surface| to device for decoding.
208 void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
209
210 // Dequeue any V4L2 buffers available and process.
211 void Dequeue();
212
213 // V4L2 QBUF helpers.
214 bool EnqueueInputRecord(int index, uint32_t config_store);
215 bool EnqueueOutputRecord(int index);
216
217 // Set input and output formats in hardware.
218 bool SetupFormats();
219
220 // Create input and output buffers.
221 bool CreateInputBuffers();
222 bool CreateOutputBuffers();
223
224 // Destroy input buffers.
225 void DestroyInputBuffers();
226
227 // Destroy output buffers and release associated resources (textures,
228 // EGLImages). If |dismiss| is true, also dismissing the associated
229 // PictureBuffers.
230 bool DestroyOutputs(bool dismiss);
231
232 // Used by DestroyOutputs.
233 bool DestroyOutputBuffers();
234
235 // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
236 // and signal |done| after finishing.
237 void DismissPictures(std::vector<int32> picture_buffer_ids,
238 base::WaitableEvent* done);
239
240 // Task to finish initialization on decoder_thread_.
241 void InitializeTask();
242
243 // Surface set change (resolution change) flow.
244 // If we have no surfaces allocated, just allocate them and return.
245 // Otherwise mark us as pending for surface set change.
246 void InitiateSurfaceSetChange();
247 // If a surface set change is pending and we are ready, stop the device,
248 // destroy outputs, releasing resources and dismissing pictures as required,
249 // followed by allocating a new set for the new resolution/DPB size
250 // as provided by decoder. Finally, try to resume decoding.
251 void FinishSurfaceSetChangeIfNeeded();
252
253 void NotifyError(Error error);
254 void DestroyTask();
255
256 // Sets the state to kError and notifies client if needed.
257 void SetErrorState(Error error);
258
259 // Flush flow when requested by client.
260 // When Flush() is called, it posts a FlushTask, which checks the input queue.
261 // If nothing is pending for decode on decoder_input_queue_, we call
262 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
263 // onto the decoder_input_queue_ to schedule a flush. When we reach it later
264 // on, we call InitiateFlush() to perform it at the correct time.
265 void FlushTask();
266 // Tell the decoder to flush all frames, reset it and mark us as scheduled
267 // for flush, so that we can finish it once all pending decodes are finished.
268 void InitiateFlush();
269 // If all pending frames are decoded and we are waiting to flush, perform it.
270 // This will send all pending pictures to client and notify the client that
271 // flush is complete and puts us in a state ready to resume.
272 void FinishFlushIfNeeded();
273
274 // Reset flow when requested by client.
275 // Drop all inputs and reset the decoder and mark us as pending for reset.
276 void ResetTask();
277 // If all pending frames are decoded and we are waiting to reset, perform it.
278 // This drops all pending outputs (client is not interested anymore),
279 // notifies the client we are done and puts us in a state ready to resume.
280 void FinishResetIfNeeded();
281
282 // Process pending events if any.
283 void ProcessPendingEventsIfNeeded();
284
285 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
286 // returning an event.
287 void ServiceDeviceTask();
288
289 // Schedule poll if we have any buffers queued and the poll thread
290 // is not stopped (on surface set change).
291 void SchedulePollIfNeeded();
292
293 // Attempt to start/stop device_poll_thread_.
294 bool StartDevicePoll();
295 bool StopDevicePoll(bool keep_input_state);
296
297 // Ran on device_poll_thread_ to wait for device events.
298 void DevicePollTask(bool poll_device);
299
300 enum State {
301 // We are in this state until Initialize() returns successfully.
302 // We can't post errors to the client in this state yet.
303 kUninitialized,
304 // Initialize() returned successfully.
305 kInitialized,
306 // This state allows making progress decoding more input stream.
307 kDecoding,
308 // Transitional state when we are not decoding any more stream, but are
309 // performing flush, reset, resolution change or are destroying ourselves.
310 kIdle,
311 // Error state, set when sending NotifyError to client.
312 kError,
313 };
314
315 enum BufferId {
316 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask().
317 };
318
319 void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);
320 void DecodeBufferTask();
321 void ScheduleDecodeBufferTaskIfNeeded();
322 bool TrySetNewBistreamBuffer();
323
324 // Auto-destruction reference for EGLSync (for message-passing).
325 struct EGLSyncKHRRef;
326 void ReusePictureBufferTask(int32 picture_buffer_id,
327 scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
328
329 // Called by accelerator implementations:
330 // Decode of |dec_surface| is ready to be submitted and all codec-specific
331 // settings are set in hardware.
332 void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
333
334 // |dec_surface| is ready to be outputted once decode is finished.
335 // This can be called before decode is actually done in hardware, and this
336 // method is responsible to maintain the order, i.e. the surfaces will
337 // be outputted in the same order as SurfaceReady calls. To do so, the
338 // surfaces are put on decoder_display_queue_ and sent to output in that
339 // order once all preceding surfaces are sent.
340 void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
341
342 // Called to actually send |dec_surface| to the client, after it is decoded
343 // preserving the order in which it was scheduled via SurfaceReady().
344 void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
345
346 // Goes over the |decoder_display_queue_| and sends all buffers from the
347 // front of the queue that are already decoded to the client, in order.
348 void TryOutputSurfaces();
349
350 // Creates a new decode surface or returns nullptr if one is not available.
351 scoped_refptr<V4L2DecodeSurface> CreateSurface();
352
353 // Send decoded pictures to PictureReady.
354 void SendPictureReady();
355
356 // Callback that indicates a picture has been cleared.
357 void PictureCleared();
358
359 size_t input_planes_count_;
360 size_t output_planes_count_;
361
362 // GPU Child thread message loop.
363 const scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;
364
365 // IO thread message loop.
366 scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_;
367
368 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
369 // device worker threads back to the child thread.
370 base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
371
372 // To expose client callbacks from VideoDecodeAccelerator.
373 // NOTE: all calls to these objects *MUST* be executed on
374 // child_message_loop_proxy_.
375 scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
376 client_ptr_factory_;
377 base::WeakPtr<VideoDecodeAccelerator::Client> client_;
378 // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
379 base::WeakPtr<Client> io_client_;
380
381 // V4L2 device in use.
382 scoped_refptr<V4L2Device> device_;
383
384 // Thread to communicate with the device on.
385 base::Thread decoder_thread_;
386 scoped_refptr<base::MessageLoopProxy> decoder_thread_proxy_;
387
388 // Thread used to poll the device for events.
389 base::Thread device_poll_thread_;
390
391 // Input queue state.
392 bool input_streamon_;
393 // Number of input buffers enqueued to the device.
394 int input_buffer_queued_count_;
395 // Input buffers ready to use; LIFO since we don't care about ordering.
396 std::list<int> free_input_buffers_;
397 // Mapping of int index to an input buffer record.
398 std::vector<InputRecord> input_buffer_map_;
399
400 // Output queue state.
401 bool output_streamon_;
402 // Number of output buffers enqueued to the device.
403 int output_buffer_queued_count_;
404 // Output buffers ready to use.
405 std::list<int> free_output_buffers_;
406 // Mapping of int index to an output buffer record.
407 std::vector<OutputRecord> output_buffer_map_;
408
409 media::VideoCodecProfile video_profile_;
410 uint32_t output_format_fourcc_;
411 gfx::Size frame_buffer_size_;
412 size_t output_dpb_size_;
413
414 struct BitstreamBufferRef;
415 // Input queue of stream buffers coming from the client.
416 std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
417 // BitstreamBuffer currently being processed.
418 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
419
420 // Queue storing decode surfaces ready to be output as soon as they are
421 // decoded. The surfaces must be output in order they are queued.
422 std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
423
424 // Decoder state.
425 State state_;
426
427 // If any of these are true, we are waiting for the device to finish decoding
428 // all previously-queued frames, so we can finish the flush/reset/surface
429 // change flows. These can stack.
430 bool decoder_flushing_;
431 bool decoder_resetting_;
432 bool surface_set_change_pending_;
433
434 // Hardware accelerators.
435 // TODO(posciak): Try to have a superclass here if possible.
436 scoped_ptr<V4L2H264Accelerator> h264_accelerator_;
437 scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_;
438
439 // Codec-specific software decoder in use.
440 scoped_ptr<AcceleratedVideoDecoder> decoder_;
441
442 // Surfaces queued to device to keep references to them while decoded.
443 using V4L2DecodeSurfaceByOutputId =
444 std::map<int, scoped_refptr<V4L2DecodeSurface>>;
445 V4L2DecodeSurfaceByOutputId surfaces_at_device_;
446
447 // Surfaces sent to client to keep references to them while displayed.
448 using V4L2DecodeSurfaceByPictureBufferId =
449 std::map<int32, scoped_refptr<V4L2DecodeSurface>>;
450 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
451
452 // Record for decoded pictures that can be sent to PictureReady.
453 struct PictureRecord;
454 // Pictures that are ready but not sent to PictureReady yet.
455 std::queue<PictureRecord> pending_picture_ready_;
456
457 // The number of pictures that are sent to PictureReady and will be cleared.
458 int picture_clearing_count_;
459
460 // Used by the decoder thread to wait for AssignPictureBuffers to arrive
461 // to avoid races with potential Reset requests.
462 base::WaitableEvent pictures_assigned_;
463
464 // Make the GL context current callback.
465 base::Callback<bool(void)> make_context_current_;
466
467 // EGL state
468 EGLDisplay egl_display_;
469 EGLContext egl_context_;
470
471 // The WeakPtrFactory for |weak_this_|.
472 base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
473
474 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
475 };
476
477 // Codec-specific subclasses of software decoder picture classes.
478 // This allows us to keep decoders oblivious of our implementation details.
479 class V4L2H264Picture : public H264Picture {
480 public:
481 V4L2H264Picture(const scoped_refptr<
482 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)
483 : dec_surface_(dec_surface) {}
484 virtual ~V4L2H264Picture() {}
485
486 V4L2H264Picture* AsV4L2H264Picture() override { return this; }
487 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
488 dec_surface() {
489 return dec_surface_;
490 }
491
492 private:
493 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
494 dec_surface_;
495
496 DISALLOW_COPY_AND_ASSIGN(V4L2H264Picture);
497 };
498
499 class V4L2VP8Picture : public VP8Picture {
500 public:
501 V4L2VP8Picture(const scoped_refptr<
502 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)
503 : dec_surface_(dec_surface) {}
504 virtual ~V4L2VP8Picture() {}
505
506 V4L2VP8Picture* AsV4L2VP8Picture() override { return this; }
507 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
508 dec_surface() {
509 return dec_surface_;
510 }
511
512 private:
513 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
514 dec_surface_;
515
516 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Picture);
517 };
518
519 } // namespace content
520
521 #endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698