Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Side by Side Diff: content/common/gpu/media/exynos_video_decode_accelerator.h

Issue 11198060: VDA implementation for Exynos, using V4L2 (Closed) Base URL: https://git.chromium.org/git/chromium/src@git-svn
Patch Set: content:: fixes from piman@. Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This file contains an implementation of VideoDecoderAccelerator
6 // that utilizes the hardware video decoder present on the Exynos SoC.
7
8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
10
11 #include <list>
12 #include <vector>
13
14 #include "base/callback_forward.h"
15 #include "base/memory/linked_ptr.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "media/base/video_decoder_config.h"
20 #include "media/video/video_decode_accelerator.h"
21 #include "third_party/angle/include/EGL/egl.h"
22 #include "third_party/angle/include/EGL/eglext.h"
23 #include "ui/gfx/size.h"
24
25 namespace base {
26 class MessageLoopProxy;
27 }
28
29 namespace content {
30 class H264Parser;
31
32 // This class handles Exynos video acceleration directly through the V4L2
33 // devices exported by the Multi Format Codec and GScaler hardware blocks.
34 //
35 // The threading model of this class is driven by the fact that it needs to
36 // interface two fundamentally different event queues -- the one Chromium
37 // provides through MessageLoop, and the one driven by the V4L2 devices which
38 // is waited on with epoll(). There are three threads involved in this class:
39 //
40 // * The child thread, which is the main GPU process thread which calls the
41 // media::VideoDecodeAccelerator entry points. Calls from this thread
42 // generally do not block (with the exception of Initialize() and Destroy()).
43 // They post tasks to the decoder_thread_, which actually services the task
44 // and calls back when complete through the
45 // media::VideoDecodeAccelerator::Client interface.
46 // * The decoder_thread_, owned by this class. It services API tasks, through
47 // the *Task() routines, as well as V4L2 device events, through
48 // ServiceDeviceTask(). Almost all state modification is done on this thread.
49 // * The device_poll_thread_, owned by this class. All it does is epoll() on
50 // the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the
51 // decoder_thread_ when something interesting happens.
52 // TODO(sheu): replace this thread with an TYPE_IO decoder_thread_.
53 //
54 // Note that this class has no locks! Everything's serviced on the
55 // decoder_thread_, so there are no synchronization issues.
56 // ... well, there are, but it's a matter of getting messages posted in the
57 // right order, not fiddling with locks.
58 class CONTENT_EXPORT ExynosVideoDecodeAccelerator :
59 public media::VideoDecodeAccelerator {
60 public:
61 ExynosVideoDecodeAccelerator(
62 EGLDisplay egl_display,
63 EGLContext egl_context,
64 Client* client,
65 const base::Callback<bool(void)>& make_context_current);
66 virtual ~ExynosVideoDecodeAccelerator();
67
68 // media::VideoDecodeAccelerator implementation.
69 // Note: Initialize() and Destroy() are synchronous.
70 virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE;
71 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE;
72 virtual void AssignPictureBuffers(
73 const std::vector<media::PictureBuffer>& buffers) OVERRIDE;
74 virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE;
75 virtual void Flush() OVERRIDE;
76 virtual void Reset() OVERRIDE;
77 virtual void Destroy() OVERRIDE;
78
79 // Do any necessary initialization before the sandbox is enabled.
80 static void PreSandboxInitialization();
81
82 // Lazily initialize static data after sandbox is enabled. Return false on
83 // init failure.
84 static bool PostSandboxInitialization();
85
86 private:
87 // These are rather subjectively tuned.
88 enum {
89 kMfcInputBufferCount = 8,
90 kMfcOutputBufferExtraCount = 5, // number of buffers above request by V4L2.
91 kMfcInputBufferMaxSize = 512 * 1024,
92 kGscInputBufferCount = 6,
93 kGscOutputBufferCount = 6,
94 };
95
96 // Internal state of the decoder.
97 enum State {
98 kUninitialized, // Initialize() not yet called.
99 kInitialized, // Initialize() returned true; ready to start decoding.
100 kDecoding, // DecodeBufferInitial() successful; decoding frames.
101 kResetting, // Presently resetting.
102 kAfterReset, // After Reset(), ready to start decoding again.
103 kError, // Error in kDecoding state.
104 };
105
106 enum BufferId {
107 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask().
108 };
109
110 // File descriptors we need to poll.
111 enum PollFds {
112 kPollMfc = (1 << 0),
113 kPollGsc = (1 << 1),
114 };
115
116 // Auto-destruction reference for BitstreamBuffer, for message-passing from
117 // Decode() to DecodeTask().
118 struct BitstreamBufferRef;
119
120 // Auto-destruction reference for an array of PictureBuffer, for
121 // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask().
122 struct PictureBufferArrayRef;
123
124 // Auto-destruction reference for EGLSync (for message-passing).
125 struct EGLSyncKHRRef;
126
127 // Record for MFC input buffers.
128 struct MfcInputRecord {
129 MfcInputRecord();
130 ~MfcInputRecord();
131 bool at_device; // held by device.
132 void* address; // mmap() address.
133 size_t length; // mmap() length.
134 off_t bytes_used; // bytes filled in the mmap() segment.
135 int32 input_id; // triggering input_id as given to Decode().
136 };
137
138 // Record for MFC output buffers.
139 struct MfcOutputRecord {
140 MfcOutputRecord();
141 ~MfcOutputRecord();
142 bool at_device; // held by device.
143 size_t bytes_used[2]; // bytes used in each dmabuf.
144 void* address[2]; // mmap() address for each plane.
145 size_t length[2]; // mmap() length for each plane.
146 int32 input_id; // triggering input_id as given to Decode().
147 };
148
149 // Record for GSC input buffers.
150 struct GscInputRecord {
151 GscInputRecord();
152 ~GscInputRecord();
153 bool at_device; // held by device.
154 int mfc_output; // MFC output buffer index to recycle when this input
155 // is complete
156 };
157
158 // Record for GSC output buffers.
159 struct GscOutputRecord {
160 GscOutputRecord();
161 ~GscOutputRecord();
162 bool at_device; // held by device.
163 bool at_client; // held by client.
164 int fd; // file descriptor from backing EGLImage.
165 EGLImageKHR egl_image; // backing EGLImage.
166 EGLSyncKHR egl_sync; // sync the compositor's use of the EGLImage.
167 int32 picture_id; // picture buffer id as returned to PictureReady().
168 };
169
170 //
171 // Decoding tasks, to be run on decode_thread_.
172 //
173
174 // Enqueue a BitstreamBuffer to decode. This will enqueue a buffer to the
175 // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode
176 // the buffer.
177 void DecodeTask(scoped_ptr<BitstreamBufferRef> bitstream_record);
178
179 // Decode from the buffers queued in decoder_input_queue_. Calls
180 // DecodeBufferInitial() or DecodeBufferContinue() as appropriate.
181 void DecodeBufferTask();
182 // Find the extents of one frame fragment to push to HW.
183 bool FindFrameFragment(const uint8* data, size_t size, size_t* endpos);
184 // Schedule another DecodeBufferTask() if we're behind.
185 void ScheduleDecodeBufferTaskIfNeeded();
186
187 // Return true if we should continue to schedule DecodeBufferTask()s after
188 // completion. Store the amount of input actually consumed in |endpos|.
189 bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos);
190 bool DecodeBufferContinue(const void* data, size_t size);
191
192 // Accumulate data for the next frame to decode. May return false in
193 // non-error conditions; for example when pipeline is full and should be
194 // retried later.
195 bool AppendToInputFrame(const void* data, size_t size);
196 // Flush data for one decoded frame.
197 bool FlushInputFrame();
198
199 // Process an AssignPictureBuffers() API call. After this, the
200 // device_poll_thread_ can be started safely, since we have all our
201 // buffers.
202 void AssignPictureBuffersTask(scoped_ptr<PictureBufferArrayRef> pic_buffers);
203
204 // Service I/O on the V4L2 devices. This task should only be scheduled from
205 // DevicePollTask().
206 void ServiceDeviceTask();
207 // Handle the various device queues.
208 void EnqueueMfc();
209 void DequeueMfc();
210 void EnqueueGsc();
211 void DequeueGsc();
212 // Enqueue a buffer on the corresponding queue.
213 bool EnqueueMfcInputRecord();
214 bool EnqueueMfcOutputRecord();
215 bool EnqueueGscInputRecord();
216 bool EnqueueGscOutputRecord();
217
218 // Process a ReusePictureBuffer() API call. The API call create an EGLSync
219 // object on the main (GPU process) thread; we will record this object so we
220 // can wait on it before reusing the buffer.
221 void ReusePictureBufferTask(int32 picture_buffer_id,
222 scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
223
224 // Flush() task. Child thread should not submit any more buffers until it
225 // receives the NotifyFlushDone callback. This task will schedule an empty
226 // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush.
227 void FlushTask();
228 // Notify the client of a flush completion, if required. This should be
229 // called any time a relevant queue could potentially be emptied: see
230 // function definition.
231 void NotifyFlushDoneIfNeeded();
232
233 // Reset() task. This task will schedule a ResetDoneTask() that will send
234 // the NotifyResetDone callback, then set the decoder state to kResetting so
235 // that all intervening tasks will drain.
236 void ResetTask();
237 // ResetDoneTask() will set the decoder state back to kAfterReset, so
238 // subsequent decoding can continue.
239 void ResetDoneTask();
240
241 // Device destruction task.
242 void DestroyTask();
243
244 // Attempt to start/stop device_poll_thread_.
245 bool StartDevicePoll();
246 bool StopDevicePoll();
247 // Set/clear the device poll interrupt (using device_poll_interrupt_fd_).
248 bool SetDevicePollInterrupt();
249 bool ClearDevicePollInterrupt();
250
251 //
252 // Device tasks, to be run on device_poll_thread_.
253 //
254
255 // The device task.
256 void DevicePollTask(unsigned int poll_fds);
257
258 //
259 // Safe from any thread.
260 //
261
262 // Error notification (using PostTask() to child thread, if necessary).
263 void NotifyError(Error error);
264
265 // Set the decoder_thread_ state (using PostTask to decoder thread, if
266 // necessary).
267 void SetDecoderState(State state);
268
269 //
270 // Other utility functions. Called on decoder_thread_, unless
271 // decoder_thread_ is not yet started, in which case the child thread can call
272 // these (e.g. in Initialize() or Destroy()).
273 //
274
275 // Create the buffers we need.
276 bool CreateMfcInputBuffers();
277 bool CreateMfcOutputBuffers();
278 bool CreateGscInputBuffers();
279 bool CreateGscOutputBuffers();
280
281 // Destroy these buffers.
282 void DestroyMfcInputBuffers();
283 void DestroyMfcOutputBuffers();
284 void DestroyGscInputBuffers();
285 void DestroyGscOutputBuffers();
286
287 // Our original calling message loop for the child thread.
288 scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;
289
290 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
291 // device worker threads back to the child thread. Because the worker threads
292 // are members of this class, any task running on those threads is guaranteed
293 // that this object is still alive. As a result, tasks posted from the child
294 // thread to the decoder or device thread should use base::Unretained(this),
295 // and tasks posted the other way should use |weak_this_|.
296 base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_;
297
298 // To expose client callbacks from VideoDecodeAccelerator.
299 // NOTE: all calls to these objects *MUST* be executed on
300 // child_message_loop_proxy_.
301 base::WeakPtrFactory<Client> client_ptr_factory_;
302 base::WeakPtr<Client> client_;
303
304 //
305 // Decoder state, owned and operated by decoder_thread_.
306 // Before decoder_thread_ has started, the decoder state is managed by
307 // the child (main) thread. After decoder_thread_ has started, the decoder
308 // thread should be the only one managing these.
309 //
310
311 // This thread services tasks posted from the VDA API entry points by the
312 // child thread and device service callbacks posted from the device thread.
313 base::Thread decoder_thread_;
314 // Decoder state machine state.
315 State decoder_state_;
316 // BitstreamBuffer we're presently reading.
317 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
318 // FlushTask() and ResetTask() should not affect buffers that have been
319 // queued afterwards. For flushing or resetting the pipeline then, we will
320 // delay these buffers until after the flush or reset completes.
321 int decoder_delay_bitstream_buffer_id_;
322 // MFC input buffer we're presently filling.
323 int decoder_current_input_buffer_;
324 // We track the number of buffer decode tasks we have scheduled, since each
325 // task execution should complete one buffer. If we fall behind (due to
326 // resource backpressure, etc.), we'll have to schedule more to catch up.
327 int decoder_decode_buffer_tasks_scheduled_;
328 // Picture buffers held by the client.
329 int decoder_frames_at_client_;
330 // Are we flushing?
331 bool decoder_flushing_;
332 // Input queue for decoder_thread_: BitstreamBuffers in.
333 std::list<linked_ptr<BitstreamBufferRef> > decoder_input_queue_;
334 // For H264 decode, hardware requires that we send it frame-sized chunks.
335 // We'll need to parse the stream.
336 scoped_ptr<content::H264Parser> decoder_h264_parser_;
337
338 //
339 // Hardware state and associated queues. Since decoder_thread_ services
340 // the hardware, decoder_thread_ owns these too.
341 //
342
343 // Completed decode buffers, waiting for MFC.
344 std::list<int> mfc_input_ready_queue_;
345
346 // MFC decode device.
347 int mfc_fd_;
348
349 // MFC input buffer state.
350 bool mfc_input_streamon_;
351 // MFC input buffers, total.
352 int mfc_input_buffer_count_;
353 // MFC input buffers enqueued to device.
354 int mfc_input_buffer_queued_count_;
355 // Input buffers ready to use, as a LIFO since we don't care about ordering.
356 std::vector<int> mfc_free_input_buffers_;
357 // Mapping of int index to MFC input buffer record.
358 std::vector<MfcInputRecord> mfc_input_buffer_map_;
359
360 // MFC output buffer state.
361 bool mfc_output_streamon_;
362 // MFC output buffers, total.
363 int mfc_output_buffer_count_;
364 // MFC output buffers enqueued to device.
365 int mfc_output_buffer_queued_count_;
366 // Output buffers ready to use, as a LIFO since we don't care about ordering.
367 std::vector<int> mfc_free_output_buffers_;
368 // Mapping of int index to MFC output buffer record.
369 std::vector<MfcOutputRecord> mfc_output_buffer_map_;
370 // Required size of MFC output buffers. Two sizes for two planes.
371 size_t mfc_output_buffer_size_[2];
372 uint32 mfc_output_buffer_pixelformat_;
373
374 // Completed MFC outputs, waiting for GSC.
375 std::list<int> mfc_output_gsc_input_queue_;
376
377 // GSC decode device.
378 int gsc_fd_;
379
380 // GSC input buffer state.
381 bool gsc_input_streamon_;
382 // GSC input buffers, total.
383 int gsc_input_buffer_count_;
384 // GSC input buffers enqueued to device.
385 int gsc_input_buffer_queued_count_;
386 // Input buffers ready to use, as a LIFO since we don't care about ordering.
387 std::vector<int> gsc_free_input_buffers_;
388 // Mapping of int index to GSC input buffer record.
389 std::vector<GscInputRecord> gsc_input_buffer_map_;
390
391 // GSC output buffer state.
392 bool gsc_output_streamon_;
393 // GSC output buffers, total.
394 int gsc_output_buffer_count_;
395 // GSC output buffers enqueued to device.
396 int gsc_output_buffer_queued_count_;
397 // Output buffers ready to use. We need a FIFO here.
398 std::list<int> gsc_free_output_buffers_;
399 // Mapping of int index to GSC output buffer record.
400 std::vector<GscOutputRecord> gsc_output_buffer_map_;
401
402 // Output picture size.
403 gfx::Size frame_buffer_size_;
404
405 //
406 // The device polling thread handles notifications of V4L2 device changes.
407 //
408
409 // The thread.
410 base::Thread device_poll_thread_;
411 // eventfd fd to signal device poll thread when its poll() should be
412 // interrupted.
413 int device_poll_interrupt_fd_;
414
415 //
416 // Other state, held by the child (main) thread.
417 //
418
419 // Make our context current before running any EGL entry points.
420 base::Callback<bool(void)> make_context_current_;
421
422 // EGL state
423 EGLDisplay egl_display_;
424 EGLContext egl_context_;
425
426 // The codec we'll be decoding for.
427 media::VideoCodecProfile video_profile_;
428
429 DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator);
430 };
431
432 } // namespace content
433
434 #endif // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698