Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h

Issue 833063003: Add accelerated video decoder interface, VP8 and H.264 implementations and hook up to V4L2SVDA. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressed all comments. Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
6 #define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
7
8 #include <linux/videodev2.h>
9 #include <queue>
10 #include <vector>
11
12 #include "base/memory/linked_ptr.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/synchronization/waitable_event.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "content/common/gpu/media/h264_decoder.h"
20 #include "content/common/gpu/media/v4l2_video_device.h"
21 #include "content/common/gpu/media/vp8_decoder.h"
22 #include "media/video/video_decode_accelerator.h"
23
24 namespace content {
25
26 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
27 // level codec API for decoding. The slice level API provides only a low-level
28 // decoding functionality and requires userspace to provide support for parsing
29 // the input stream and managing decoder state across frames.
30 class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator
31 : public media::VideoDecodeAccelerator {
32 public:
33 class V4L2DecodeSurface : public base::RefCounted<V4L2DecodeSurface> {
scherkus (not reviewing) 2015/01/13 01:25:00 is it possible to fwd decl this and move to the .c
Pawel Osciak 2015/01/13 11:33:35 Done.
34 public:
35 using ReleaseCB = base::Callback<void(int)>;
36
37 V4L2DecodeSurface(int32 bitstream_id,
38 int input_record,
39 int output_record,
40 const ReleaseCB& release_cb);
41 virtual ~V4L2DecodeSurface();
42
43 // Mark the surface as decoded. This will also release all references, as
44 // they are not needed anymore.
45 void SetDecoded();
46 bool decoded() const { return decoded_; }
47
48 int32 bitstream_id() const { return bitstream_id_; }
49 int input_record() const { return input_record_; }
50 int output_record() const { return output_record_; }
51 uint32_t config_store() const { return config_store_; }
52
53 // Take references to each reference surface and keep them until the
54 // target surface is decoded.
55 void SetReferenceSurfaces(
56 const std::vector<scoped_refptr<V4L2DecodeSurface>>& ref_surfaces);
57
58 std::string ToString() const;
59
60 private:
61 int32 bitstream_id_;
62 int input_record_;
63 int output_record_;
64 uint32_t config_store_;
65
66 bool decoded_;
67 ReleaseCB release_cb_;
68
69 std::vector<scoped_refptr<V4L2DecodeSurface>> reference_surfaces_;
70
71 DISALLOW_COPY_AND_ASSIGN(V4L2DecodeSurface);
72 };
73
74 V4L2SliceVideoDecodeAccelerator(
75 const scoped_refptr<V4L2Device>& device,
76 EGLDisplay egl_display,
77 EGLContext egl_context,
78 const base::WeakPtr<Client>& io_client_,
79 const base::Callback<bool(void)>& make_context_current,
80 const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);
81 virtual ~V4L2SliceVideoDecodeAccelerator();
scherkus (not reviewing) 2015/01/13 01:25:00 override
Pawel Osciak 2015/01/13 11:33:35 Done.
82
83 // media::VideoDecodeAccelerator implementation.
84 virtual bool Initialize(media::VideoCodecProfile profile,
scherkus (not reviewing) 2015/01/13 01:25:00 remove all virtual keywords
Pawel Osciak 2015/01/13 11:33:34 Done.
85 VideoDecodeAccelerator::Client* client) override;
86 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) override;
87 virtual void AssignPictureBuffers(
88 const std::vector<media::PictureBuffer>& buffers) override;
89 virtual void ReusePictureBuffer(int32 picture_buffer_id) override;
90 virtual void Flush() override;
91 virtual void Reset() override;
92 virtual void Destroy() override;
93 virtual bool CanDecodeOnIOThread() override;
94
95 private:
96 class V4L2H264Accelerator : public H264Decoder::H264Accelerator {
scherkus (not reviewing) 2015/01/13 01:25:00 is it possible to fwd decl this and move to the .c
Pawel Osciak 2015/01/13 11:33:35 Done.
97 public:
98 V4L2H264Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);
99 virtual ~V4L2H264Accelerator() {}
scherkus (not reviewing) 2015/01/13 01:25:00 override and move impl to .cc
Pawel Osciak 2015/01/13 11:33:35 Done.
100
101 // H264Decoder::H264Accelerator implementation.
102 scoped_refptr<H264Picture> CreateH264Picture() override;
103
104 bool SubmitFrameMetadata(const media::H264SPS* sps,
105 const media::H264PPS* pps,
106 const H264DPB& dpb,
107 const H264Picture::Vector& ref_pic_listp0,
108 const H264Picture::Vector& ref_pic_listb0,
109 const H264Picture::Vector& ref_pic_listb1,
110 const scoped_refptr<H264Picture>& pic) override;
111
112 bool SubmitSlice(const media::H264PPS* pps,
113 const media::H264SliceHeader* slice_hdr,
114 const H264Picture::Vector& ref_pic_list0,
115 const H264Picture::Vector& ref_pic_list1,
116 const scoped_refptr<H264Picture>& pic,
117 const uint8_t* data,
118 size_t size) override;
119
120 bool SubmitDecode(const scoped_refptr<H264Picture>& pic) override;
121 bool OutputPicture(const scoped_refptr<H264Picture>& pic) override;
122
123 private:
124 // Max size of reference list.
125 static const size_t kDPBIndicesListSize = 32;
126 void H264PictureListToDPBIndicesList(
127 const H264Picture::Vector& src_pic_list,
128 uint8_t dst_list[kDPBIndicesListSize]);
129
130 void H264DPBToV4L2DPB(
131 const H264DPB& dpb,
132 std::vector<scoped_refptr<V4L2DecodeSurface>>* ref_surfaces);
133
134 scoped_refptr<V4L2DecodeSurface> H264PictureToV4L2DecodeSurface(
135 const scoped_refptr<H264Picture>& pic);
136
137 size_t num_slices_;
138 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;
139
140 // TODO(posciak): This should be queried from hardware once supported.
141 static const size_t kMaxSlices = 16;
142 struct v4l2_ctrl_h264_slice_param v4l2_slice_params_[kMaxSlices];
143 struct v4l2_ctrl_h264_decode_param v4l2_decode_param_;
144
145 DISALLOW_COPY_AND_ASSIGN(V4L2H264Accelerator);
146 };
147
148 class V4L2VP8Accelerator : public VP8Decoder::VP8Accelerator {
149 public:
150 V4L2VP8Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec);
151 virtual ~V4L2VP8Accelerator() {}
152
153 // H264Decoder::VP8Accelerator implementation.
154 scoped_refptr<VP8Picture> CreateVP8Picture() override;
155
156 bool SubmitDecode(const scoped_refptr<VP8Picture>& pic,
157 const media::Vp8FrameHeader* frame_hdr,
158 const scoped_refptr<VP8Picture>& last_frame,
159 const scoped_refptr<VP8Picture>& golden_frame,
160 const scoped_refptr<VP8Picture>& alt_frame) override;
161
162 bool OutputPicture(const scoped_refptr<VP8Picture>& pic) override;
163
164 private:
165 scoped_refptr<V4L2DecodeSurface> VP8PictureToV4L2DecodeSurface(
166 const scoped_refptr<VP8Picture>& pic);
167
168 V4L2SliceVideoDecodeAccelerator* v4l2_dec_;
169
170 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Accelerator);
171 };
172
173 // Record for input buffers.
174 struct InputRecord {
175 InputRecord();
176 int32 input_id;
177 void* address;
178 size_t length;
179 size_t bytes_used;
180 bool at_device;
181 };
182
183 // Record for output buffers.
184 struct OutputRecord {
185 OutputRecord();
186 bool at_device;
187 bool at_client;
188 int32 picture_id;
189 EGLImageKHR egl_image;
190 EGLSyncKHR egl_sync;
191 bool cleared;
192 };
193
194 // See http://crbug.com/255116.
195 // Input bitstream buffer size for up to 1080p streams.
196 const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
197 // Input bitstream buffer size for up to 4k streams.
198 const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
199 const size_t kNumInputBuffers = 16;
200
201 //
202 // Below methods are used by accelerator implementations.
203 //
204 // Append slice data in |data| of size |size| to pending hardware
205 // input buffer with |index|. This buffer will be submitted for decode
206 // on the next DecodeSurface(). Return true on success.
207 bool SubmitSlice(int index, const uint8_t* data, size_t size);
208
209 // Submit controls in |ext_ctrls| to hardware. Return true on success.
210 bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
211
212 // Decode of |dec_surface| is ready to be submitted and all codec-specific
213 // settings are set in hardware.
214 void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
215
216 // |dec_surface| is ready to be outputted once decode is finished.
217 // This can be called before decode is actually done in hardware, and this
218 // method is responsible for maintaining the ordering, i.e. the surfaces will
219 // be outputted in the same order as SurfaceReady calls. To do so, the
220 // surfaces are put on decoder_display_queue_ and sent to output in that
221 // order once all preceding surfaces are sent.
222 void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
223
224 //
225 // Internal methods of this class.
226 //
227 // Recycle V4L2 output buffer with |index|. Used as surface release callback.
228 void ReuseOutputBuffer(int index);
229
230 // Queue a |dec_surface| to device for decoding.
231 void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
232
233 // Dequeue any V4L2 buffers available and process.
234 void Dequeue();
235
236 // V4L2 QBUF helpers.
237 bool EnqueueInputRecord(int index, uint32_t config_store);
238 bool EnqueueOutputRecord(int index);
239
240 // Set input and output formats in hardware.
241 bool SetupFormats();
242
243 // Create input and output buffers.
244 bool CreateInputBuffers();
245 bool CreateOutputBuffers();
246
247 // Destroy input buffers.
248 void DestroyInputBuffers();
249
250 // Destroy output buffers and release associated resources (textures,
251 // EGLImages). If |dismiss| is true, also dismissing the associated
252 // PictureBuffers.
253 bool DestroyOutputs(bool dismiss);
254
255 // Used by DestroyOutputs.
256 bool DestroyOutputBuffers();
257
258 // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
259 // and signal |done| after finishing.
260 void DismissPictures(std::vector<int32> picture_buffer_ids,
261 base::WaitableEvent* done);
262
263 // Task to finish initialization on decoder_thread_.
264 void InitializeTask();
265
266 // Surface set change (resolution change) flow.
267 // If we have no surfaces allocated, just allocate them and return.
268 // Otherwise mark us as pending for surface set change.
269 void InitiateSurfaceSetChange();
270 // If a surface set change is pending and we are ready, stop the device,
271 // destroy outputs, releasing resources and dismissing pictures as required,
272 // followed by allocating a new set for the new resolution/DPB size
273 // as provided by decoder. Finally, try to resume decoding.
274 void FinishSurfaceSetChangeIfNeeded();
275
276 void NotifyError(Error error);
277 void DestroyTask();
278
279 // Sets the state to kError and notifies client if needed.
280 void SetErrorState(Error error);
281
282 // Flush flow when requested by client.
283 // When Flush() is called, it posts a FlushTask, which checks the input queue.
284 // If nothing is pending for decode on decoder_input_queue_, we call
285 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
286 // onto the decoder_input_queue_ to schedule a flush. When we reach it later
287 // on, we call InitiateFlush() to perform it at the correct time.
288 void FlushTask();
289 // Tell the decoder to flush all frames, reset it and mark us as scheduled
290 // for flush, so that we can finish it once all pending decodes are finished.
291 void InitiateFlush();
292 // If all pending frames are decoded and we are waiting to flush, perform it.
293 // This will send all pending pictures to client and notify the client that
294 // flush is complete and puts us in a state ready to resume.
295 void FinishFlushIfNeeded();
296
297 // Reset flow when requested by client.
298 // Drop all inputs and reset the decoder and mark us as pending for reset.
299 void ResetTask();
300 // If all pending frames are decoded and we are waiting to reset, perform it.
301 // This drops all pending outputs (client is not interested anymore),
302 // notifies the client we are done and puts us in a state ready to resume.
303 void FinishResetIfNeeded();
304
305 // Process pending events if any.
306 void ProcessPendingEventsIfNeeded();
307
308 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
309 // returning an event.
310 void ServiceDeviceTask();
311
312 // Schedule poll if we have any buffers queued and the poll thread
313 // is not stopped (on surface set change).
314 void SchedulePollIfNeeded();
315
316 // Attempt to start/stop device_poll_thread_.
317 bool StartDevicePoll();
318 bool StopDevicePoll(bool keep_input_state);
319
320 // Ran on device_poll_thread_ to wait for device events.
321 void DevicePollTask(bool poll_device);
322
323 enum State {
324 // We are in this state until Initialize() returns successfully.
325 // We can't post errors to the client in this state yet.
326 kUninitialized,
327 // Initialize() returned successfully.
328 kInitialized,
329 // This state allows making progress decoding more input stream.
330 kDecoding,
331 // Transitional state when we are not decoding any more stream, but are
332 // performing flush, reset, resolution change or are destroying ourselves.
333 kIdle,
334 // Error state, set when sending NotifyError to client.
335 kError,
336 };
337
338 // Buffer id for flush buffer, queued by FlushTask().
339 const int kFlushBufferId = -1;
kcwu 2015/01/12 13:22:01 I'm confused. IIUC, you already used -1 for someth
Pawel Osciak 2015/01/13 11:33:35 I'm confused what happened here too. Sorry.
340
341 // Handler for Decode() on decoder_thread_.
342 void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);
343
344 // Schedule a new DecodeBufferTask if we are decoding.
345 void ScheduleDecodeBufferTaskIfNeeded();
346
347 // Main decoder loop. Keep decoding the current buffer in decoder_, asking
348 // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
349 // and handle other returns from it appropriately.
350 void DecodeBufferTask();
351
352 // Check decoder_input_queue_ for any available buffers to decode and
353 // set the decoder_current_bitstream_buffer_ to the next buffer if one is
354 // available, taking it off the queue. Also set the current stream pointer
355 // in decoder_, and return true.
356 // Return false if no buffers are pending on decoder_input_queue_.
357 bool TrySetNewBistreamBuffer();
358
359 // Auto-destruction reference for EGLSync (for message-passing).
360 struct EGLSyncKHRRef;
361 void ReusePictureBufferTask(int32 picture_buffer_id,
362 scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
363
364 // Called to actually send |dec_surface| to the client, after it is decoded
365 // preserving the order in which it was scheduled via SurfaceReady().
366 void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
367
368 // Goes over the |decoder_display_queue_| and sends all buffers from the
369 // front of the queue that are already decoded to the client, in order.
370 void TryOutputSurfaces();
371
372 // Creates a new decode surface or returns nullptr if one is not available.
373 scoped_refptr<V4L2DecodeSurface> CreateSurface();
374
375 // Send decoded pictures to PictureReady.
376 void SendPictureReady();
377
378 // Callback that indicates a picture has been cleared.
379 void PictureCleared();
380
381 size_t input_planes_count_;
382 size_t output_planes_count_;
383
384 // GPU Child thread message loop.
385 const scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;
386
387 // IO thread message loop.
388 scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_;
389
390 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
391 // device worker threads back to the child thread.
392 base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
393
394 // To expose client callbacks from VideoDecodeAccelerator.
395 // NOTE: all calls to these objects *MUST* be executed on
396 // child_message_loop_proxy_.
397 scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
398 client_ptr_factory_;
399 base::WeakPtr<VideoDecodeAccelerator::Client> client_;
400 // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
401 base::WeakPtr<Client> io_client_;
402
403 // V4L2 device in use.
404 scoped_refptr<V4L2Device> device_;
405
406 // Thread to communicate with the device on.
407 base::Thread decoder_thread_;
408 scoped_refptr<base::MessageLoopProxy> decoder_thread_proxy_;
409
410 // Thread used to poll the device for events.
411 base::Thread device_poll_thread_;
412
413 // Input queue state.
414 bool input_streamon_;
415 // Number of input buffers enqueued to the device.
416 int input_buffer_queued_count_;
417 // Input buffers ready to use; LIFO since we don't care about ordering.
418 std::list<int> free_input_buffers_;
419 // Mapping of int index to an input buffer record.
420 std::vector<InputRecord> input_buffer_map_;
421
422 // Output queue state.
423 bool output_streamon_;
424 // Number of output buffers enqueued to the device.
425 int output_buffer_queued_count_;
426 // Output buffers ready to use.
427 std::list<int> free_output_buffers_;
428 // Mapping of int index to an output buffer record.
429 std::vector<OutputRecord> output_buffer_map_;
430
431 media::VideoCodecProfile video_profile_;
432 uint32_t output_format_fourcc_;
433 gfx::Size frame_buffer_size_;
434 size_t output_dpb_size_;
435
436 struct BitstreamBufferRef;
437 // Input queue of stream buffers coming from the client.
438 std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
439 // BitstreamBuffer currently being processed.
440 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
441
442 // Queue storing decode surfaces ready to be output as soon as they are
443 // decoded. The surfaces must be output in order they are queued.
444 std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
445
446 // Decoder state.
447 State state_;
448
449 // If any of these are true, we are waiting for the device to finish decoding
450 // all previously-queued frames, so we can finish the flush/reset/surface
451 // change flows. These can stack.
452 bool decoder_flushing_;
453 bool decoder_resetting_;
454 bool surface_set_change_pending_;
455
456 // Hardware accelerators.
457 // TODO(posciak): Try to have a superclass here if possible.
458 scoped_ptr<V4L2H264Accelerator> h264_accelerator_;
459 scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_;
460
461 // Codec-specific software decoder in use.
462 scoped_ptr<AcceleratedVideoDecoder> decoder_;
463
464 // Surfaces queued to device to keep references to them while decoded.
465 using V4L2DecodeSurfaceByOutputId =
466 std::map<int, scoped_refptr<V4L2DecodeSurface>>;
467 V4L2DecodeSurfaceByOutputId surfaces_at_device_;
468
469 // Surfaces sent to client to keep references to them while displayed.
470 using V4L2DecodeSurfaceByPictureBufferId =
471 std::map<int32, scoped_refptr<V4L2DecodeSurface>>;
472 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
473
474 // Record for decoded pictures that can be sent to PictureReady.
475 struct PictureRecord;
476 // Pictures that are ready but not sent to PictureReady yet.
477 std::queue<PictureRecord> pending_picture_ready_;
478
479 // The number of pictures that are sent to PictureReady and will be cleared.
480 int picture_clearing_count_;
481
482 // Used by the decoder thread to wait for AssignPictureBuffers to arrive
483 // to avoid races with potential Reset requests.
484 base::WaitableEvent pictures_assigned_;
485
486 // Make the GL context current callback.
487 base::Callback<bool(void)> make_context_current_;
488
489 // EGL state
490 EGLDisplay egl_display_;
491 EGLContext egl_context_;
492
493 // The WeakPtrFactory for |weak_this_|.
494 base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
495
496 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
497 };
498
499 // Codec-specific subclasses of software decoder picture classes.
500 // This allows us to keep decoders oblivious of our implementation details.
501 class V4L2H264Picture : public H264Picture {
scherkus (not reviewing) 2015/01/13 01:25:00 ditto for .cc
Pawel Osciak 2015/01/13 11:33:35 Done.
502 public:
503 V4L2H264Picture(const scoped_refptr<
504 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)
505 : dec_surface_(dec_surface) {}
506 virtual ~V4L2H264Picture() {}
507
508 V4L2H264Picture* AsV4L2H264Picture() override { return this; }
509 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
510 dec_surface() {
511 return dec_surface_;
512 }
513
514 private:
515 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
516 dec_surface_;
517
518 DISALLOW_COPY_AND_ASSIGN(V4L2H264Picture);
519 };
520
521 class V4L2VP8Picture : public VP8Picture {
scherkus (not reviewing) 2015/01/13 01:25:00 ditto for .cc
Pawel Osciak 2015/01/13 11:33:35 Done.
522 public:
523 V4L2VP8Picture(const scoped_refptr<
524 V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface)
525 : dec_surface_(dec_surface) {}
526 virtual ~V4L2VP8Picture() {}
527
528 V4L2VP8Picture* AsV4L2VP8Picture() override { return this; }
529 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
530 dec_surface() {
531 return dec_surface_;
532 }
533
534 private:
535 scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>
536 dec_surface_;
537
538 DISALLOW_COPY_AND_ASSIGN(V4L2VP8Picture);
539 };
540
541 } // namespace content
542
543 #endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698