OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // This file contains an implementation of VideoDecoderAccelerator | |
6 // that utilizes the hardware video decoder present on the Exynos SoC. | |
7 | |
8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ | |
9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ | |
10 | |
11 #include <list> | |
12 #include <vector> | |
13 | |
14 #include "base/callback_forward.h" | |
15 #include "base/memory/linked_ptr.h" | |
16 #include "base/memory/scoped_ptr.h" | |
17 #include "base/threading/thread.h" | |
18 #include "content/common/content_export.h" | |
19 #include "media/base/video_decoder_config.h" | |
20 #include "media/video/video_decode_accelerator.h" | |
21 #include "third_party/angle/include/EGL/egl.h" | |
22 #include "third_party/angle/include/EGL/eglext.h" | |
23 #include "ui/gfx/size.h" | |
24 | |
25 namespace base { | |
26 class MessageLoopProxy; | |
27 } | |
28 | |
29 namespace content { | |
30 class H264Parser; | |
31 | |
32 // This class handles Exynos video acceleration directly through the V4L2 | |
33 // devices exported by the Multi Format Codec and GScaler hardware blocks. | |
34 // | |
35 // The threading model of this class is driven by the fact that it needs to | |
36 // interface two fundamentally different event queues -- the one Chromium | |
37 // provides through MessageLoop, and the one driven by the V4L2 devices which | |
38 // is waited on with epoll(). There are three threads involved in this class: | |
piman
2013/01/12 03:24:58
note: you can register a fd into the MessageLoop (
sheu
2013/01/14 23:49:49
fischman@ suggested this previously. I will do th
| |
39 // | |
40 // * The child thread, which is the main GPU process thread which calls the | |
piman
2013/01/12 03:24:58
nit: we commonly use "main" thread to mean the Chi
sheu
2013/01/14 23:49:49
The terminology I picked up from the VAAPI decoder
| |
41 // media::VideoDecodeAccelerator entry points. Calls from this thread | |
42 // generally do not block (with the exception of Initialize() and Destroy()). | |
43 // They post tasks to the decoder_thread_, which actually services the task | |
44 // and calls back when complete through the | |
45 // media::VideoDecodeAccelerator::Client interface. | |
46 // * The decoder_thread_, owned by this class. It services API tasks, through | |
47 // the *Task() routines, as well as V4L2 device events, through | |
48 // ServiceDeviceTask(). Almost all state modification is done on this thread. | |
49 // * The device_poll_thread_, owned by this class. All it does is epoll() on | |
50 // the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the | |
51 // decoder_thread_ when something interesting happens. | |
52 // TODO(sheu): replace this thread with an TYPE_IO decoder_thread_. | |
53 // | |
54 // Note that this class has no locks! Everything's serviced on the | |
55 // decoder_thread_, so there are no synchronization issues. | |
56 // ... well, there are, but it's a matter of getting messages posted in the | |
57 // right order, not fiddling with locks. | |
piman
2013/01/12 03:24:58
Note: the last paragraph is basically the modus op
sheu
2013/01/14 23:49:49
Yeah, and I'm glad it's this way. It's not this w
| |
58 class CONTENT_EXPORT ExynosVideoDecodeAccelerator : | |
59 public media::VideoDecodeAccelerator { | |
60 public: | |
61 ExynosVideoDecodeAccelerator( | |
62 EGLDisplay egl_display, | |
63 EGLContext egl_context, | |
64 Client* client, | |
65 const base::Callback<bool(void)>& make_context_current); | |
66 virtual ~ExynosVideoDecodeAccelerator(); | |
67 | |
68 // media::VideoDecodeAccelerator implementation. | |
69 // Note: Initialize() and Destroy() are synchronous. | |
70 virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE; | |
71 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE; | |
72 virtual void AssignPictureBuffers( | |
73 const std::vector<media::PictureBuffer>& buffers) OVERRIDE; | |
74 virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE; | |
75 virtual void Flush() OVERRIDE; | |
76 virtual void Reset() OVERRIDE; | |
77 virtual void Destroy() OVERRIDE; | |
78 | |
79 // Do any necessary initialization before the sandbox is enabled. | |
80 static void PreSandboxInitialization(); | |
81 | |
82 // Lazily initialize static data after sandbox is enabled. Return false on | |
83 // init failure. | |
84 static bool PostSandboxInitialization(); | |
85 | |
86 private: | |
87 // These are rather subjectively tuned. | |
88 enum { | |
89 kMfcInputBufferCount = 8, | |
90 kMfcOutputBufferExtraCount = 5, // number of buffers above request by V4L2. | |
91 kMfcInputBufferMaxSize = 512 * 1024, | |
92 kGscInputBufferCount = 6, | |
93 kGscOutputBufferCount = 6, | |
94 }; | |
piman
2013/01/12 03:24:58
nit: we don't typically use anonymous enums with v
sheu
2013/01/14 23:49:49
It seems to be the convention with media files, so
| |
95 | |
96 // Internal state of the decoder. | |
97 enum State { | |
98 kUninitialized, // Initialize() not yet called. | |
99 kInitialized, // Initialize() returned true; ready to start decoding. | |
100 kDecoding, // DecodeBufferInitial() successful; decoding frames. | |
101 kResetting, // Presently resetting. | |
102 kAfterReset, // After Reset(), ready to start decoding again. | |
103 kError, // Error in kDecoding state. | |
104 }; | |
105 | |
106 enum BufferId { | |
107 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask(). | |
108 }; | |
109 | |
110 // File descriptors we need to poll. | |
111 enum PollFds { | |
112 kPollMfc = (1 << 0), | |
113 kPollGsc = (1 << 1), | |
114 }; | |
115 | |
116 // Auto-destruction reference for BitstreamBuffer, for message-passing from | |
117 // Decode() to DecodeTask(). | |
118 struct BitstreamBufferRef; | |
119 | |
120 // Auto-destruction reference for an array of PictureBuffer, for | |
121 // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask(). | |
122 struct PictureBufferArrayRef; | |
123 | |
124 // Auto-destruction reference for EGLSync (for message-passing). | |
125 struct EGLSyncKHRRef; | |
126 | |
127 // Record for MFC input buffers. | |
128 struct MfcInputRecord { | |
129 MfcInputRecord(); | |
130 ~MfcInputRecord(); | |
131 bool at_device; // held by device. | |
132 void* address; // mmap() address. | |
133 size_t length; // mmap() length. | |
134 off_t bytes_used; // bytes filled in the mmap() segment. | |
135 int32 input_id; // triggering input_id as given to Decode(). | |
136 }; | |
137 | |
138 // Record for MFC output buffers. | |
139 struct MfcOutputRecord { | |
140 MfcOutputRecord(); | |
141 ~MfcOutputRecord(); | |
142 bool at_device; // held by device. | |
143 size_t bytes_used[2]; // bytes used in each dmabuf. | |
144 void* address[2]; // mmap() address for each plane. | |
145 size_t length[2]; // mmap() length for each plane. | |
146 int32 input_id; // triggering input_id as given to Decode(). | |
147 }; | |
148 | |
149 // Record for GSC input buffers. | |
150 struct GscInputRecord { | |
151 GscInputRecord(); | |
152 ~GscInputRecord(); | |
153 bool at_device; // held by device. | |
154 int mfc_output; // MFC output buffer index to recycle when this input | |
155 // is complete | |
156 }; | |
157 | |
158 // Record for GSC output buffers. | |
159 struct GscOutputRecord { | |
160 GscOutputRecord(); | |
161 ~GscOutputRecord(); | |
162 bool at_device; // held by device. | |
163 bool at_client; // held by client. | |
164 int fd; // file descriptor from backing EGLImage. | |
165 EGLImageKHR egl_image; // backing EGLImage. | |
166 EGLSyncKHR egl_sync; // sync the compositor's use of the EGLImage. | |
167 int32 picture_id; // picture buffer id as returned to PictureReady(). | |
168 }; | |
169 | |
170 // | |
171 // Decoding tasks, to be run on decode_thread_. | |
172 // | |
173 | |
174 // Enqueue a BitstreamBuffer to decode. This will enqueue a buffer to the | |
175 // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode | |
176 // the buffer. | |
177 void DecodeTask(scoped_ptr<BitstreamBufferRef> bitstream_record); | |
178 | |
179 // Decode from the buffers queued in decoder_input_queue_. Calls | |
180 // DecodeBufferInitial() or DecodeBufferContinue() as appropriate. | |
181 void DecodeBufferTask(); | |
182 // Find the extents of one frame fragment to push to HW. | |
183 bool FindFrameFragment(const uint8* data, size_t size, size_t* endpos); | |
184 // Schedule another DecodeBufferTask() if we're behind. | |
185 void ScheduleDecodeBufferTaskIfNeeded(); | |
186 | |
187 // Return true if we should continue to schedule DecodeBufferTask()s after | |
188 // completion. Store the amount of input actually consumed in |endpos|. | |
189 bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos); | |
190 bool DecodeBufferContinue(const void* data, size_t size); | |
191 | |
192 // Accumulate data for the next frame to decode. May return false in | |
193 // non-error conditions; for example when pipeline is full and should be | |
194 // retried later. | |
195 bool AppendToInputFrame(const void* data, size_t size); | |
196 // Flush data for one decoded frame. | |
197 bool FlushInputFrame(); | |
198 | |
199 // Process an AssignPictureBuffers() API call. After this, the | |
200 // device_poll_thread_ can be started safely, since we have all our | |
201 // buffers. | |
202 void AssignPictureBuffersTask(scoped_ptr<PictureBufferArrayRef> pic_buffers); | |
203 | |
204 // Service I/O on the V4L2 devices. This task should only be scheduled from | |
205 // DevicePollTask(). | |
206 void ServiceDeviceTask(); | |
207 // Handle the various device queues. | |
208 void EnqueueMfc(); | |
209 void DequeueMfc(); | |
210 void EnqueueGsc(); | |
211 void DequeueGsc(); | |
212 // Enqueue a buffer on the corresponding queue. | |
213 bool EnqueueMfcInputRecord(); | |
214 bool EnqueueMfcOutputRecord(); | |
215 bool EnqueueGscInputRecord(); | |
216 bool EnqueueGscOutputRecord(); | |
217 | |
218 // Process a ReusePictureBuffer() API call. The API call create an EGLSync | |
219 // object on the main (GPU process) thread; we will record this object so we | |
220 // can wait on it before reusing the buffer. | |
221 void ReusePictureBufferTask(int32 picture_buffer_id, | |
222 scoped_ptr<EGLSyncKHRRef> egl_sync_ref); | |
223 | |
224 // Flush() task. Child thread should not submit any more buffers until it | |
225 // receives the NotifyFlushDone callback. This task will schedule an empty | |
226 // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush. | |
227 void FlushTask(); | |
228 // Notify the client of a flush completion, if required. This should be | |
229 // called any time a relevant queue could potentially be emptied: see | |
230 // function definition. | |
231 void NotifyFlushDoneIfNeeded(); | |
232 | |
233 // Reset() task. This task will schedule a ResetDoneTask() that will send | |
234 // the NotifyResetDone callback, then set the decoder state to kResetting so | |
235 // that all intervening tasks will drain. | |
236 void ResetTask(); | |
237 // ResetDoneTask() will set the decoder state back to kAfterReset, so | |
238 // subsequent decoding can continue. | |
239 void ResetDoneTask(); | |
240 | |
241 // Device destruction task. | |
242 void DestroyTask(); | |
243 | |
244 // Attempt to start/stop device_poll_thread_. | |
245 bool StartDevicePoll(); | |
246 bool StopDevicePoll(); | |
247 // Set/clear the device poll interrupt (using device_poll_interrupt_fd_). | |
248 bool SetDevicePollInterrupt(); | |
249 bool ClearDevicePollInterrupt(); | |
250 | |
251 // | |
252 // Device tasks, to be run on device_poll_thread_. | |
253 // | |
254 | |
255 // The device task. | |
256 void DevicePollTask(unsigned int poll_fds); | |
257 | |
258 // | |
259 // Safe from any thread. | |
260 // | |
261 | |
262 // Error notification (using PostTask() to child thread, if necessary). | |
263 void NotifyError(Error error); | |
264 | |
265 // Set the decoder_thread_ state (using PostTask to decoder thread, if | |
266 // necessary). | |
267 void SetDecoderState(State state); | |
268 | |
269 // | |
270 // Other utility functions. Called on decoder_thread_, unless | |
271 // decoder_thread_ is not yet started, in which case the child thread can call | |
272 // these (e.g. in Initialize() or Destroy()). | |
273 // | |
274 | |
275 // Create the buffers we need. | |
276 bool CreateMfcInputBuffers(); | |
277 bool CreateMfcOutputBuffers(); | |
278 bool CreateGscInputBuffers(); | |
279 bool CreateGscOutputBuffers(); | |
280 | |
281 // Destroy these buffers. | |
282 void DestroyMfcInputBuffers(); | |
283 void DestroyMfcOutputBuffers(); | |
284 void DestroyGscInputBuffers(); | |
285 void DestroyGscOutputBuffers(); | |
286 | |
287 // Our original calling message loop for the child thread. | |
288 scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_; | |
289 | |
290 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or | |
291 // device worker threads back to the child thread. Because the worker threads | |
292 // are members of this class, any task running on those threads is guaranteed | |
293 // that this object is still alive. As a result, tasks posted from the child | |
294 // thread to the decoder or device thread should use base::Unretained(this), | |
295 // and tasks posted the other way should use |weak_this_|. | |
296 base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_; | |
297 | |
298 // To expose client callbacks from VideoDecodeAccelerator. | |
299 // NOTE: all calls to these objects *MUST* be executed on | |
300 // child_message_loop_proxy_. | |
301 base::WeakPtrFactory<Client> client_ptr_factory_; | |
302 base::WeakPtr<Client> client_; | |
303 | |
304 // | |
305 // Decoder state, owned and operated by decoder_thread_. | |
306 // Before decoder_thread_ has started, the decoder state is managed by | |
307 // the child (main) thread. After decoder_thread_ has started, the decoder | |
308 // thread should be the only one managing these. | |
309 // | |
310 | |
311 // This thread services tasks posted from the VDA API entry points by the | |
312 // child thread and device service callbacks posted from the device thread. | |
piman
2013/01/12 03:24:58
This class is pretty big. It could be nice to sepa
sheu
2013/01/14 23:49:49
If/when this becomes a general V4L2 video decoder
| |
313 base::Thread decoder_thread_; | |
314 // Decoder state machine state. | |
315 State decoder_state_; | |
316 // BitstreamBuffer we're presently reading. | |
317 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; | |
318 // FlushTask() and ResetTask() should not affect buffers that have been | |
319 // queued afterwards. For flushing or resetting the pipeline then, we will | |
320 // delay these buffers until after the flush or reset completes. | |
321 int decoder_delay_bitstream_buffer_id_; | |
322 // MFC input buffer we're presently filling. | |
323 int decoder_current_input_buffer_; | |
324 // We track the number of buffer decode tasks we have scheduled, since each | |
325 // task execution should complete one buffer. If we fall behind (due to | |
326 // resource backpressure, etc.), we'll have to schedule more to catch up. | |
327 int decoder_decode_buffer_tasks_scheduled_; | |
328 // Picture buffers held by the client. | |
329 int decoder_frames_at_client_; | |
330 // Are we flushing? | |
331 bool decoder_flushing_; | |
332 // Input queue for decoder_thread_: BitstreamBuffers in. | |
333 std::list<linked_ptr<BitstreamBufferRef> > decoder_input_queue_; | |
334 // For H264 decode, hardware requires that we send it frame-sized chunks. | |
335 // We'll need to parse the stream. | |
336 scoped_ptr<content::H264Parser> decoder_h264_parser_; | |
337 | |
338 // | |
339 // Hardware state and associated queues. Since decoder_thread_ services | |
340 // the hardware, decoder_thread_ owns these too. | |
341 // | |
342 | |
343 // Completed decode buffers, waiting for MFC. | |
344 std::list<int> mfc_input_ready_queue_; | |
345 | |
346 // MFC decode device. | |
347 int mfc_fd_; | |
348 | |
349 // MFC input buffer state. | |
350 bool mfc_input_streamon_; | |
351 // MFC input buffers, total. | |
352 int mfc_input_buffer_count_; | |
353 // MFC input buffers enqueued to device. | |
354 int mfc_input_buffer_queued_count_; | |
355 // Input buffers ready to use, as a LIFO since we don't care about ordering. | |
356 std::vector<int> mfc_free_input_buffers_; | |
357 // Mapping of int index to MFC input buffer record. | |
358 std::vector<MfcInputRecord> mfc_input_buffer_map_; | |
359 | |
360 // MFC output buffer state. | |
361 bool mfc_output_streamon_; | |
362 // MFC output buffers, total. | |
363 int mfc_output_buffer_count_; | |
364 // MFC output buffers enqueued to device. | |
365 int mfc_output_buffer_queued_count_; | |
366 // Output buffers ready to use, as a LIFO since we don't care about ordering. | |
367 std::vector<int> mfc_free_output_buffers_; | |
368 // Mapping of int index to MFC output buffer record. | |
369 std::vector<MfcOutputRecord> mfc_output_buffer_map_; | |
370 // Required size of MFC output buffers. Two sizes for two planes. | |
371 size_t mfc_output_buffer_size_[2]; | |
372 uint32 mfc_output_buffer_pixelformat_; | |
373 | |
374 // Completed MFC outputs, waiting for GSC. | |
375 std::list<int> mfc_output_gsc_input_queue_; | |
376 | |
377 // GSC decode device. | |
378 int gsc_fd_; | |
379 | |
380 // GSC input buffer state. | |
381 bool gsc_input_streamon_; | |
382 // GSC input buffers, total. | |
383 int gsc_input_buffer_count_; | |
384 // GSC input buffers enqueued to device. | |
385 int gsc_input_buffer_queued_count_; | |
386 // Input buffers ready to use, as a LIFO since we don't care about ordering. | |
387 std::vector<int> gsc_free_input_buffers_; | |
388 // Mapping of int index to GSC input buffer record. | |
389 std::vector<GscInputRecord> gsc_input_buffer_map_; | |
390 | |
391 // GSC output buffer state. | |
392 bool gsc_output_streamon_; | |
393 // GSC output buffers, total. | |
394 int gsc_output_buffer_count_; | |
395 // GSC output buffers enqueued to device. | |
396 int gsc_output_buffer_queued_count_; | |
397 // Output buffers ready to use. We need a FIFO here. | |
398 std::list<int> gsc_free_output_buffers_; | |
399 // Mapping of int index to GSC output buffer record. | |
400 std::vector<GscOutputRecord> gsc_output_buffer_map_; | |
401 | |
402 // Output picture size. | |
403 gfx::Size frame_buffer_size_; | |
404 | |
405 // | |
406 // The device polling thread handles notifications of V4L2 device changes. | |
407 // | |
408 | |
409 // The thread. | |
410 base::Thread device_poll_thread_; | |
411 // eventfd fd to signal device poll thread when its poll() should be | |
412 // interrupted. | |
413 int device_poll_interrupt_fd_; | |
414 | |
415 // | |
416 // Other state, held by the child (main) thread. | |
417 // | |
418 | |
419 // Make our context current before running any EGL entry points. | |
420 base::Callback<bool(void)> make_context_current_; | |
421 | |
422 // EGL state | |
423 EGLDisplay egl_display_; | |
424 EGLContext egl_context_; | |
425 | |
426 // The codec we'll be decoding for. | |
427 media::VideoCodecProfile video_profile_; | |
428 | |
429 DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator); | |
430 }; | |
431 | |
432 } // namespace content | |
433 | |
434 #endif // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ | |
OLD | NEW |