OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "webkit/renderer/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h" | |
6 | |
7 #include <algorithm> | |
8 | |
9 #include "base/logging.h" | |
10 #include "media/base/audio_bus.h" | |
11 #include "media/base/audio_timestamp_helper.h" | |
12 #include "media/base/buffers.h" | |
13 #include "media/base/data_buffer.h" | |
14 #include "media/base/limits.h" | |
15 #include "webkit/renderer/media/crypto/ppapi/cdm/content_decryption_module.h" | |
16 | |
17 // Include FFmpeg header files. | |
18 extern "C" { | |
19 // Temporarily disable possible loss of data warning. | |
20 MSVC_PUSH_DISABLE_WARNING(4244); | |
21 #include <libavcodec/avcodec.h> | |
22 MSVC_POP_WARNING(); | |
23 } // extern "C" | |
24 | |
25 namespace webkit_media { | |
26 | |
27 // Maximum number of channels with defined layout in src/media. | |
28 static const int kMaxChannels = 8; | |
29 | |
30 static AVCodecID CdmAudioCodecToCodecID( | |
31 cdm::AudioDecoderConfig::AudioCodec audio_codec) { | |
32 switch (audio_codec) { | |
33 case cdm::AudioDecoderConfig::kCodecVorbis: | |
34 return AV_CODEC_ID_VORBIS; | |
35 case cdm::AudioDecoderConfig::kCodecAac: | |
36 return AV_CODEC_ID_AAC; | |
37 case cdm::AudioDecoderConfig::kUnknownAudioCodec: | |
38 default: | |
39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; | |
40 return AV_CODEC_ID_NONE; | |
41 } | |
42 } | |
43 | |
44 static void CdmAudioDecoderConfigToAVCodecContext( | |
45 const cdm::AudioDecoderConfig& config, | |
46 AVCodecContext* codec_context) { | |
47 codec_context->codec_type = AVMEDIA_TYPE_AUDIO; | |
48 codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); | |
49 | |
50 switch (config.bits_per_channel) { | |
51 case 8: | |
52 codec_context->sample_fmt = AV_SAMPLE_FMT_U8; | |
53 break; | |
54 case 16: | |
55 codec_context->sample_fmt = AV_SAMPLE_FMT_S16; | |
56 break; | |
57 case 32: | |
58 codec_context->sample_fmt = AV_SAMPLE_FMT_S32; | |
59 break; | |
60 default: | |
61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " | |
62 "per channel: " << config.bits_per_channel; | |
63 codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; | |
64 } | |
65 | |
66 codec_context->channels = config.channel_count; | |
67 codec_context->sample_rate = config.samples_per_second; | |
68 | |
69 if (config.extra_data) { | |
70 codec_context->extradata_size = config.extra_data_size; | |
71 codec_context->extradata = reinterpret_cast<uint8_t*>( | |
72 av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); | |
73 memcpy(codec_context->extradata, config.extra_data, | |
74 config.extra_data_size); | |
75 memset(codec_context->extradata + config.extra_data_size, '\0', | |
76 FF_INPUT_BUFFER_PADDING_SIZE); | |
77 } else { | |
78 codec_context->extradata = NULL; | |
79 codec_context->extradata_size = 0; | |
80 } | |
81 } | |
82 | |
83 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host) | |
84 : is_initialized_(false), | |
85 host_(host), | |
86 codec_context_(NULL), | |
87 av_frame_(NULL), | |
88 bits_per_channel_(0), | |
89 samples_per_second_(0), | |
90 channels_(0), | |
91 av_sample_format_(0), | |
92 bytes_per_frame_(0), | |
93 last_input_timestamp_(media::kNoTimestamp()), | |
94 output_bytes_to_drop_(0) { | |
95 } | |
96 | |
97 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { | |
98 ReleaseFFmpegResources(); | |
99 } | |
100 | |
101 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { | |
102 DVLOG(1) << "Initialize()"; | |
103 | |
104 if (!IsValidConfig(config)) { | |
105 LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; | |
106 return false; | |
107 } | |
108 | |
109 if (is_initialized_) { | |
110 LOG(ERROR) << "Initialize(): Already initialized."; | |
111 return false; | |
112 } | |
113 | |
114 // Initialize AVCodecContext structure. | |
115 codec_context_ = avcodec_alloc_context3(NULL); | |
116 CdmAudioDecoderConfigToAVCodecContext(config, codec_context_); | |
117 | |
118 // MP3 decodes to S16P which we don't support, tell it to use S16 instead. | |
119 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) | |
120 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; | |
121 | |
122 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); | |
123 if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) { | |
124 DLOG(ERROR) << "Could not initialize audio decoder: " | |
125 << codec_context_->codec_id; | |
126 return false; | |
127 } | |
128 | |
129 // Ensure avcodec_open2() respected our format request. | |
130 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { | |
131 DLOG(ERROR) << "Unable to configure a supported sample format: " | |
132 << codec_context_->sample_fmt; | |
133 return false; | |
134 } | |
135 | |
136 // Some codecs will only output float data, so we need to convert to integer | |
137 // before returning the decoded buffer. | |
138 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP || | |
139 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { | |
140 // Preallocate the AudioBus for float conversions. We can treat interleaved | |
141 // float data as a single planar channel since our output is expected in an | |
142 // interleaved format anyways. | |
143 int channels = codec_context_->channels; | |
144 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) | |
145 channels = 1; | |
146 converter_bus_ = media::AudioBus::CreateWrapper(channels); | |
147 } | |
148 | |
149 // Success! | |
150 av_frame_ = avcodec_alloc_frame(); | |
151 bits_per_channel_ = config.bits_per_channel; | |
152 samples_per_second_ = config.samples_per_second; | |
153 bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8; | |
154 output_timestamp_helper_.reset( | |
155 new media::AudioTimestampHelper(config.samples_per_second)); | |
156 serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_); | |
157 is_initialized_ = true; | |
158 | |
159 // Store initial values to guard against midstream configuration changes. | |
160 channels_ = codec_context_->channels; | |
161 av_sample_format_ = codec_context_->sample_fmt; | |
162 | |
163 return true; | |
164 } | |
165 | |
166 void FFmpegCdmAudioDecoder::Deinitialize() { | |
167 DVLOG(1) << "Deinitialize()"; | |
168 ReleaseFFmpegResources(); | |
169 is_initialized_ = false; | |
170 ResetTimestampState(); | |
171 } | |
172 | |
173 void FFmpegCdmAudioDecoder::Reset() { | |
174 DVLOG(1) << "Reset()"; | |
175 avcodec_flush_buffers(codec_context_); | |
176 ResetTimestampState(); | |
177 } | |
178 | |
179 // static | |
180 bool FFmpegCdmAudioDecoder::IsValidConfig( | |
181 const cdm::AudioDecoderConfig& config) { | |
182 return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && | |
183 config.channel_count > 0 && | |
184 config.channel_count <= kMaxChannels && | |
185 config.bits_per_channel > 0 && | |
186 config.bits_per_channel <= media::limits::kMaxBitsPerSample && | |
187 config.samples_per_second > 0 && | |
188 config.samples_per_second <= media::limits::kMaxSampleRate; | |
189 } | |
190 | |
191 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( | |
192 const uint8_t* compressed_buffer, | |
193 int32_t compressed_buffer_size, | |
194 int64_t input_timestamp, | |
195 cdm::AudioFrames* decoded_frames) { | |
196 DVLOG(1) << "DecodeBuffer()"; | |
197 const bool is_end_of_stream = !compressed_buffer; | |
198 base::TimeDelta timestamp = | |
199 base::TimeDelta::FromMicroseconds(input_timestamp); | |
200 | |
201 bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; | |
202 if (!is_end_of_stream) { | |
203 if (last_input_timestamp_ == media::kNoTimestamp()) { | |
204 if (is_vorbis && timestamp < base::TimeDelta()) { | |
205 // Dropping frames for negative timestamps as outlined in section A.2 | |
206 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html | |
207 int frames_to_drop = floor( | |
208 0.5 + -timestamp.InSecondsF() * samples_per_second_); | |
209 output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; | |
210 } else { | |
211 last_input_timestamp_ = timestamp; | |
212 } | |
213 } else if (timestamp != media::kNoTimestamp()) { | |
214 if (timestamp < last_input_timestamp_) { | |
215 base::TimeDelta diff = timestamp - last_input_timestamp_; | |
216 DVLOG(1) << "Input timestamps are not monotonically increasing! " | |
217 << " ts " << timestamp.InMicroseconds() << " us" | |
218 << " diff " << diff.InMicroseconds() << " us"; | |
219 return cdm::kDecodeError; | |
220 } | |
221 | |
222 last_input_timestamp_ = timestamp; | |
223 } | |
224 } | |
225 | |
226 AVPacket packet; | |
227 av_init_packet(&packet); | |
228 packet.data = const_cast<uint8_t*>(compressed_buffer); | |
229 packet.size = compressed_buffer_size; | |
230 | |
231 // Each audio packet may contain several frames, so we must call the decoder | |
232 // until we've exhausted the packet. Regardless of the packet size we always | |
233 // want to hand it to the decoder at least once, otherwise we would end up | |
234 // skipping end of stream packets since they have a size of zero. | |
235 do { | |
236 // Reset frame to default values. | |
237 avcodec_get_frame_defaults(av_frame_); | |
238 | |
239 int frame_decoded = 0; | |
240 int result = avcodec_decode_audio4( | |
241 codec_context_, av_frame_, &frame_decoded, &packet); | |
242 | |
243 if (result < 0) { | |
244 DCHECK(!is_end_of_stream) | |
245 << "End of stream buffer produced an error! " | |
246 << "This is quite possibly a bug in the audio decoder not handling " | |
247 << "end of stream AVPackets correctly."; | |
248 | |
249 DLOG(ERROR) | |
250 << "Error decoding an audio frame with timestamp: " | |
251 << timestamp.InMicroseconds() << " us, duration: " | |
252 << timestamp.InMicroseconds() << " us, packet size: " | |
253 << compressed_buffer_size << " bytes"; | |
254 | |
255 return cdm::kDecodeError; | |
256 } | |
257 | |
258 // Update packet size and data pointer in case we need to call the decoder | |
259 // with the remaining bytes from this packet. | |
260 packet.size -= result; | |
261 packet.data += result; | |
262 | |
263 if (output_timestamp_helper_->base_timestamp() == media::kNoTimestamp() && | |
264 !is_end_of_stream) { | |
265 DCHECK(timestamp != media::kNoTimestamp()); | |
266 if (output_bytes_to_drop_ > 0) { | |
267 // Currently Vorbis is the only codec that causes us to drop samples. | |
268 // If we have to drop samples it always means the timeline starts at 0. | |
269 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); | |
270 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); | |
271 } else { | |
272 output_timestamp_helper_->SetBaseTimestamp(timestamp); | |
273 } | |
274 } | |
275 | |
276 int decoded_audio_size = 0; | |
277 if (frame_decoded) { | |
278 if (av_frame_->sample_rate != samples_per_second_ || | |
279 av_frame_->channels != channels_ || | |
280 av_frame_->format != av_sample_format_) { | |
281 DLOG(ERROR) << "Unsupported midstream configuration change!" | |
282 << " Sample Rate: " << av_frame_->sample_rate << " vs " | |
283 << samples_per_second_ | |
284 << ", Channels: " << av_frame_->channels << " vs " | |
285 << channels_ | |
286 << ", Sample Format: " << av_frame_->format << " vs " | |
287 << av_sample_format_; | |
288 return cdm::kDecodeError; | |
289 } | |
290 | |
291 decoded_audio_size = av_samples_get_buffer_size( | |
292 NULL, codec_context_->channels, av_frame_->nb_samples, | |
293 codec_context_->sample_fmt, 1); | |
294 // If we're decoding into float, adjust audio size. | |
295 if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) { | |
296 DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT || | |
297 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); | |
298 decoded_audio_size *= | |
299 static_cast<float>(bits_per_channel_ / 8) / sizeof(float); | |
300 } | |
301 } | |
302 | |
303 int start_sample = 0; | |
304 if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { | |
305 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) | |
306 << "Decoder didn't output full frames"; | |
307 | |
308 int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); | |
309 start_sample = dropped_size / bytes_per_frame_; | |
310 decoded_audio_size -= dropped_size; | |
311 output_bytes_to_drop_ -= dropped_size; | |
312 } | |
313 | |
314 scoped_refptr<media::DataBuffer> output; | |
315 if (decoded_audio_size > 0) { | |
316 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) | |
317 << "Decoder didn't output full frames"; | |
318 | |
319 // Convert float data using an AudioBus. | |
320 if (converter_bus_) { | |
321 // Setup the AudioBus as a wrapper of the AVFrame data and then use | |
322 // AudioBus::ToInterleaved() to convert the data as necessary. | |
323 int skip_frames = start_sample; | |
324 int total_frames = av_frame_->nb_samples; | |
325 int frames_to_interleave = decoded_audio_size / bytes_per_frame_; | |
326 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { | |
327 DCHECK_EQ(converter_bus_->channels(), 1); | |
328 total_frames *= codec_context_->channels; | |
329 skip_frames *= codec_context_->channels; | |
330 frames_to_interleave *= codec_context_->channels; | |
331 } | |
332 | |
333 converter_bus_->set_frames(total_frames); | |
334 for (int i = 0; i < converter_bus_->channels(); ++i) { | |
335 converter_bus_->SetChannelData(i, reinterpret_cast<float*>( | |
336 av_frame_->extended_data[i])); | |
337 } | |
338 | |
339 output = new media::DataBuffer(decoded_audio_size); | |
340 output->set_data_size(decoded_audio_size); | |
341 | |
342 DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames); | |
343 converter_bus_->ToInterleavedPartial( | |
344 skip_frames, frames_to_interleave, bits_per_channel_ / 8, | |
345 output->writable_data()); | |
346 } else { | |
347 output = media::DataBuffer::CopyFrom( | |
348 av_frame_->extended_data[0] + start_sample * bytes_per_frame_, | |
349 decoded_audio_size); | |
350 } | |
351 | |
352 base::TimeDelta output_timestamp = | |
353 output_timestamp_helper_->GetTimestamp(); | |
354 output_timestamp_helper_->AddFrames(decoded_audio_size / | |
355 bytes_per_frame_); | |
356 | |
357 // Serialize the audio samples into |serialized_audio_frames_|. | |
358 SerializeInt64(output_timestamp.InMicroseconds()); | |
359 SerializeInt64(output->data_size()); | |
360 serialized_audio_frames_.insert( | |
361 serialized_audio_frames_.end(), | |
362 output->data(), | |
363 output->data() + output->data_size()); | |
364 } | |
365 } while (packet.size > 0); | |
366 | |
367 if (!serialized_audio_frames_.empty()) { | |
368 decoded_frames->SetFrameBuffer( | |
369 host_->Allocate(serialized_audio_frames_.size())); | |
370 if (!decoded_frames->FrameBuffer()) { | |
371 LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed."; | |
372 return cdm::kDecodeError; | |
373 } | |
374 memcpy(decoded_frames->FrameBuffer()->Data(), | |
375 &serialized_audio_frames_[0], | |
376 serialized_audio_frames_.size()); | |
377 decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); | |
378 serialized_audio_frames_.clear(); | |
379 | |
380 return cdm::kSuccess; | |
381 } | |
382 | |
383 return cdm::kNeedMoreData; | |
384 } | |
385 | |
386 void FFmpegCdmAudioDecoder::ResetTimestampState() { | |
387 output_timestamp_helper_->SetBaseTimestamp(media::kNoTimestamp()); | |
388 last_input_timestamp_ = media::kNoTimestamp(); | |
389 output_bytes_to_drop_ = 0; | |
390 } | |
391 | |
392 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { | |
393 DVLOG(1) << "ReleaseFFmpegResources()"; | |
394 | |
395 if (codec_context_) { | |
396 av_free(codec_context_->extradata); | |
397 avcodec_close(codec_context_); | |
398 av_free(codec_context_); | |
399 codec_context_ = NULL; | |
400 } | |
401 if (av_frame_) { | |
402 av_free(av_frame_); | |
403 av_frame_ = NULL; | |
404 } | |
405 } | |
406 | |
407 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { | |
408 int previous_size = serialized_audio_frames_.size(); | |
409 serialized_audio_frames_.resize(previous_size + sizeof(value)); | |
410 memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); | |
411 } | |
412 | |
413 } // namespace webkit_media | |
OLD | NEW |