| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "webkit/renderer/media/crypto/ppapi/ffmpeg_cdm_audio_decoder.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "media/base/audio_bus.h" | |
| 11 #include "media/base/audio_timestamp_helper.h" | |
| 12 #include "media/base/buffers.h" | |
| 13 #include "media/base/data_buffer.h" | |
| 14 #include "media/base/limits.h" | |
| 15 #include "webkit/renderer/media/crypto/ppapi/cdm/content_decryption_module.h" | |
| 16 | |
| 17 // Include FFmpeg header files. | |
| 18 extern "C" { | |
| 19 // Temporarily disable possible loss of data warning. | |
| 20 MSVC_PUSH_DISABLE_WARNING(4244); | |
| 21 #include <libavcodec/avcodec.h> | |
| 22 MSVC_POP_WARNING(); | |
| 23 } // extern "C" | |
| 24 | |
| 25 namespace webkit_media { | |
| 26 | |
| 27 // Maximum number of channels with defined layout in src/media. | |
| 28 static const int kMaxChannels = 8; | |
| 29 | |
| 30 static AVCodecID CdmAudioCodecToCodecID( | |
| 31 cdm::AudioDecoderConfig::AudioCodec audio_codec) { | |
| 32 switch (audio_codec) { | |
| 33 case cdm::AudioDecoderConfig::kCodecVorbis: | |
| 34 return AV_CODEC_ID_VORBIS; | |
| 35 case cdm::AudioDecoderConfig::kCodecAac: | |
| 36 return AV_CODEC_ID_AAC; | |
| 37 case cdm::AudioDecoderConfig::kUnknownAudioCodec: | |
| 38 default: | |
| 39 NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec; | |
| 40 return AV_CODEC_ID_NONE; | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 static void CdmAudioDecoderConfigToAVCodecContext( | |
| 45 const cdm::AudioDecoderConfig& config, | |
| 46 AVCodecContext* codec_context) { | |
| 47 codec_context->codec_type = AVMEDIA_TYPE_AUDIO; | |
| 48 codec_context->codec_id = CdmAudioCodecToCodecID(config.codec); | |
| 49 | |
| 50 switch (config.bits_per_channel) { | |
| 51 case 8: | |
| 52 codec_context->sample_fmt = AV_SAMPLE_FMT_U8; | |
| 53 break; | |
| 54 case 16: | |
| 55 codec_context->sample_fmt = AV_SAMPLE_FMT_S16; | |
| 56 break; | |
| 57 case 32: | |
| 58 codec_context->sample_fmt = AV_SAMPLE_FMT_S32; | |
| 59 break; | |
| 60 default: | |
| 61 DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits " | |
| 62 "per channel: " << config.bits_per_channel; | |
| 63 codec_context->sample_fmt = AV_SAMPLE_FMT_NONE; | |
| 64 } | |
| 65 | |
| 66 codec_context->channels = config.channel_count; | |
| 67 codec_context->sample_rate = config.samples_per_second; | |
| 68 | |
| 69 if (config.extra_data) { | |
| 70 codec_context->extradata_size = config.extra_data_size; | |
| 71 codec_context->extradata = reinterpret_cast<uint8_t*>( | |
| 72 av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE)); | |
| 73 memcpy(codec_context->extradata, config.extra_data, | |
| 74 config.extra_data_size); | |
| 75 memset(codec_context->extradata + config.extra_data_size, '\0', | |
| 76 FF_INPUT_BUFFER_PADDING_SIZE); | |
| 77 } else { | |
| 78 codec_context->extradata = NULL; | |
| 79 codec_context->extradata_size = 0; | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(cdm::Host* host) | |
| 84 : is_initialized_(false), | |
| 85 host_(host), | |
| 86 codec_context_(NULL), | |
| 87 av_frame_(NULL), | |
| 88 bits_per_channel_(0), | |
| 89 samples_per_second_(0), | |
| 90 channels_(0), | |
| 91 av_sample_format_(0), | |
| 92 bytes_per_frame_(0), | |
| 93 last_input_timestamp_(media::kNoTimestamp()), | |
| 94 output_bytes_to_drop_(0) { | |
| 95 } | |
| 96 | |
| 97 FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() { | |
| 98 ReleaseFFmpegResources(); | |
| 99 } | |
| 100 | |
| 101 bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) { | |
| 102 DVLOG(1) << "Initialize()"; | |
| 103 | |
| 104 if (!IsValidConfig(config)) { | |
| 105 LOG(ERROR) << "Initialize(): invalid audio decoder configuration."; | |
| 106 return false; | |
| 107 } | |
| 108 | |
| 109 if (is_initialized_) { | |
| 110 LOG(ERROR) << "Initialize(): Already initialized."; | |
| 111 return false; | |
| 112 } | |
| 113 | |
| 114 // Initialize AVCodecContext structure. | |
| 115 codec_context_ = avcodec_alloc_context3(NULL); | |
| 116 CdmAudioDecoderConfigToAVCodecContext(config, codec_context_); | |
| 117 | |
| 118 // MP3 decodes to S16P which we don't support, tell it to use S16 instead. | |
| 119 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) | |
| 120 codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; | |
| 121 | |
| 122 AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); | |
| 123 if (!codec || avcodec_open2(codec_context_, codec, NULL) < 0) { | |
| 124 DLOG(ERROR) << "Could not initialize audio decoder: " | |
| 125 << codec_context_->codec_id; | |
| 126 return false; | |
| 127 } | |
| 128 | |
| 129 // Ensure avcodec_open2() respected our format request. | |
| 130 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { | |
| 131 DLOG(ERROR) << "Unable to configure a supported sample format: " | |
| 132 << codec_context_->sample_fmt; | |
| 133 return false; | |
| 134 } | |
| 135 | |
| 136 // Some codecs will only output float data, so we need to convert to integer | |
| 137 // before returning the decoded buffer. | |
| 138 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP || | |
| 139 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { | |
| 140 // Preallocate the AudioBus for float conversions. We can treat interleaved | |
| 141 // float data as a single planar channel since our output is expected in an | |
| 142 // interleaved format anyways. | |
| 143 int channels = codec_context_->channels; | |
| 144 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) | |
| 145 channels = 1; | |
| 146 converter_bus_ = media::AudioBus::CreateWrapper(channels); | |
| 147 } | |
| 148 | |
| 149 // Success! | |
| 150 av_frame_ = avcodec_alloc_frame(); | |
| 151 bits_per_channel_ = config.bits_per_channel; | |
| 152 samples_per_second_ = config.samples_per_second; | |
| 153 bytes_per_frame_ = codec_context_->channels * bits_per_channel_ / 8; | |
| 154 output_timestamp_helper_.reset( | |
| 155 new media::AudioTimestampHelper(config.samples_per_second)); | |
| 156 serialized_audio_frames_.reserve(bytes_per_frame_ * samples_per_second_); | |
| 157 is_initialized_ = true; | |
| 158 | |
| 159 // Store initial values to guard against midstream configuration changes. | |
| 160 channels_ = codec_context_->channels; | |
| 161 av_sample_format_ = codec_context_->sample_fmt; | |
| 162 | |
| 163 return true; | |
| 164 } | |
| 165 | |
| 166 void FFmpegCdmAudioDecoder::Deinitialize() { | |
| 167 DVLOG(1) << "Deinitialize()"; | |
| 168 ReleaseFFmpegResources(); | |
| 169 is_initialized_ = false; | |
| 170 ResetTimestampState(); | |
| 171 } | |
| 172 | |
| 173 void FFmpegCdmAudioDecoder::Reset() { | |
| 174 DVLOG(1) << "Reset()"; | |
| 175 avcodec_flush_buffers(codec_context_); | |
| 176 ResetTimestampState(); | |
| 177 } | |
| 178 | |
| 179 // static | |
| 180 bool FFmpegCdmAudioDecoder::IsValidConfig( | |
| 181 const cdm::AudioDecoderConfig& config) { | |
| 182 return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec && | |
| 183 config.channel_count > 0 && | |
| 184 config.channel_count <= kMaxChannels && | |
| 185 config.bits_per_channel > 0 && | |
| 186 config.bits_per_channel <= media::limits::kMaxBitsPerSample && | |
| 187 config.samples_per_second > 0 && | |
| 188 config.samples_per_second <= media::limits::kMaxSampleRate; | |
| 189 } | |
| 190 | |
| 191 cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer( | |
| 192 const uint8_t* compressed_buffer, | |
| 193 int32_t compressed_buffer_size, | |
| 194 int64_t input_timestamp, | |
| 195 cdm::AudioFrames* decoded_frames) { | |
| 196 DVLOG(1) << "DecodeBuffer()"; | |
| 197 const bool is_end_of_stream = !compressed_buffer; | |
| 198 base::TimeDelta timestamp = | |
| 199 base::TimeDelta::FromMicroseconds(input_timestamp); | |
| 200 | |
| 201 bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS; | |
| 202 if (!is_end_of_stream) { | |
| 203 if (last_input_timestamp_ == media::kNoTimestamp()) { | |
| 204 if (is_vorbis && timestamp < base::TimeDelta()) { | |
| 205 // Dropping frames for negative timestamps as outlined in section A.2 | |
| 206 // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html | |
| 207 int frames_to_drop = floor( | |
| 208 0.5 + -timestamp.InSecondsF() * samples_per_second_); | |
| 209 output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop; | |
| 210 } else { | |
| 211 last_input_timestamp_ = timestamp; | |
| 212 } | |
| 213 } else if (timestamp != media::kNoTimestamp()) { | |
| 214 if (timestamp < last_input_timestamp_) { | |
| 215 base::TimeDelta diff = timestamp - last_input_timestamp_; | |
| 216 DVLOG(1) << "Input timestamps are not monotonically increasing! " | |
| 217 << " ts " << timestamp.InMicroseconds() << " us" | |
| 218 << " diff " << diff.InMicroseconds() << " us"; | |
| 219 return cdm::kDecodeError; | |
| 220 } | |
| 221 | |
| 222 last_input_timestamp_ = timestamp; | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 AVPacket packet; | |
| 227 av_init_packet(&packet); | |
| 228 packet.data = const_cast<uint8_t*>(compressed_buffer); | |
| 229 packet.size = compressed_buffer_size; | |
| 230 | |
| 231 // Each audio packet may contain several frames, so we must call the decoder | |
| 232 // until we've exhausted the packet. Regardless of the packet size we always | |
| 233 // want to hand it to the decoder at least once, otherwise we would end up | |
| 234 // skipping end of stream packets since they have a size of zero. | |
| 235 do { | |
| 236 // Reset frame to default values. | |
| 237 avcodec_get_frame_defaults(av_frame_); | |
| 238 | |
| 239 int frame_decoded = 0; | |
| 240 int result = avcodec_decode_audio4( | |
| 241 codec_context_, av_frame_, &frame_decoded, &packet); | |
| 242 | |
| 243 if (result < 0) { | |
| 244 DCHECK(!is_end_of_stream) | |
| 245 << "End of stream buffer produced an error! " | |
| 246 << "This is quite possibly a bug in the audio decoder not handling " | |
| 247 << "end of stream AVPackets correctly."; | |
| 248 | |
| 249 DLOG(ERROR) | |
| 250 << "Error decoding an audio frame with timestamp: " | |
| 251 << timestamp.InMicroseconds() << " us, duration: " | |
| 252 << timestamp.InMicroseconds() << " us, packet size: " | |
| 253 << compressed_buffer_size << " bytes"; | |
| 254 | |
| 255 return cdm::kDecodeError; | |
| 256 } | |
| 257 | |
| 258 // Update packet size and data pointer in case we need to call the decoder | |
| 259 // with the remaining bytes from this packet. | |
| 260 packet.size -= result; | |
| 261 packet.data += result; | |
| 262 | |
| 263 if (output_timestamp_helper_->base_timestamp() == media::kNoTimestamp() && | |
| 264 !is_end_of_stream) { | |
| 265 DCHECK(timestamp != media::kNoTimestamp()); | |
| 266 if (output_bytes_to_drop_ > 0) { | |
| 267 // Currently Vorbis is the only codec that causes us to drop samples. | |
| 268 // If we have to drop samples it always means the timeline starts at 0. | |
| 269 DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS); | |
| 270 output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta()); | |
| 271 } else { | |
| 272 output_timestamp_helper_->SetBaseTimestamp(timestamp); | |
| 273 } | |
| 274 } | |
| 275 | |
| 276 int decoded_audio_size = 0; | |
| 277 if (frame_decoded) { | |
| 278 if (av_frame_->sample_rate != samples_per_second_ || | |
| 279 av_frame_->channels != channels_ || | |
| 280 av_frame_->format != av_sample_format_) { | |
| 281 DLOG(ERROR) << "Unsupported midstream configuration change!" | |
| 282 << " Sample Rate: " << av_frame_->sample_rate << " vs " | |
| 283 << samples_per_second_ | |
| 284 << ", Channels: " << av_frame_->channels << " vs " | |
| 285 << channels_ | |
| 286 << ", Sample Format: " << av_frame_->format << " vs " | |
| 287 << av_sample_format_; | |
| 288 return cdm::kDecodeError; | |
| 289 } | |
| 290 | |
| 291 decoded_audio_size = av_samples_get_buffer_size( | |
| 292 NULL, codec_context_->channels, av_frame_->nb_samples, | |
| 293 codec_context_->sample_fmt, 1); | |
| 294 // If we're decoding into float, adjust audio size. | |
| 295 if (converter_bus_ && bits_per_channel_ / 8 != sizeof(float)) { | |
| 296 DCHECK(codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT || | |
| 297 codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP); | |
| 298 decoded_audio_size *= | |
| 299 static_cast<float>(bits_per_channel_ / 8) / sizeof(float); | |
| 300 } | |
| 301 } | |
| 302 | |
| 303 int start_sample = 0; | |
| 304 if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) { | |
| 305 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) | |
| 306 << "Decoder didn't output full frames"; | |
| 307 | |
| 308 int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_); | |
| 309 start_sample = dropped_size / bytes_per_frame_; | |
| 310 decoded_audio_size -= dropped_size; | |
| 311 output_bytes_to_drop_ -= dropped_size; | |
| 312 } | |
| 313 | |
| 314 scoped_refptr<media::DataBuffer> output; | |
| 315 if (decoded_audio_size > 0) { | |
| 316 DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0) | |
| 317 << "Decoder didn't output full frames"; | |
| 318 | |
| 319 // Convert float data using an AudioBus. | |
| 320 if (converter_bus_) { | |
| 321 // Setup the AudioBus as a wrapper of the AVFrame data and then use | |
| 322 // AudioBus::ToInterleaved() to convert the data as necessary. | |
| 323 int skip_frames = start_sample; | |
| 324 int total_frames = av_frame_->nb_samples; | |
| 325 int frames_to_interleave = decoded_audio_size / bytes_per_frame_; | |
| 326 if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { | |
| 327 DCHECK_EQ(converter_bus_->channels(), 1); | |
| 328 total_frames *= codec_context_->channels; | |
| 329 skip_frames *= codec_context_->channels; | |
| 330 frames_to_interleave *= codec_context_->channels; | |
| 331 } | |
| 332 | |
| 333 converter_bus_->set_frames(total_frames); | |
| 334 for (int i = 0; i < converter_bus_->channels(); ++i) { | |
| 335 converter_bus_->SetChannelData(i, reinterpret_cast<float*>( | |
| 336 av_frame_->extended_data[i])); | |
| 337 } | |
| 338 | |
| 339 output = new media::DataBuffer(decoded_audio_size); | |
| 340 output->set_data_size(decoded_audio_size); | |
| 341 | |
| 342 DCHECK_EQ(frames_to_interleave, converter_bus_->frames() - skip_frames); | |
| 343 converter_bus_->ToInterleavedPartial( | |
| 344 skip_frames, frames_to_interleave, bits_per_channel_ / 8, | |
| 345 output->writable_data()); | |
| 346 } else { | |
| 347 output = media::DataBuffer::CopyFrom( | |
| 348 av_frame_->extended_data[0] + start_sample * bytes_per_frame_, | |
| 349 decoded_audio_size); | |
| 350 } | |
| 351 | |
| 352 base::TimeDelta output_timestamp = | |
| 353 output_timestamp_helper_->GetTimestamp(); | |
| 354 output_timestamp_helper_->AddFrames(decoded_audio_size / | |
| 355 bytes_per_frame_); | |
| 356 | |
| 357 // Serialize the audio samples into |serialized_audio_frames_|. | |
| 358 SerializeInt64(output_timestamp.InMicroseconds()); | |
| 359 SerializeInt64(output->data_size()); | |
| 360 serialized_audio_frames_.insert( | |
| 361 serialized_audio_frames_.end(), | |
| 362 output->data(), | |
| 363 output->data() + output->data_size()); | |
| 364 } | |
| 365 } while (packet.size > 0); | |
| 366 | |
| 367 if (!serialized_audio_frames_.empty()) { | |
| 368 decoded_frames->SetFrameBuffer( | |
| 369 host_->Allocate(serialized_audio_frames_.size())); | |
| 370 if (!decoded_frames->FrameBuffer()) { | |
| 371 LOG(ERROR) << "DecodeBuffer() cdm::Host::Allocate failed."; | |
| 372 return cdm::kDecodeError; | |
| 373 } | |
| 374 memcpy(decoded_frames->FrameBuffer()->Data(), | |
| 375 &serialized_audio_frames_[0], | |
| 376 serialized_audio_frames_.size()); | |
| 377 decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size()); | |
| 378 serialized_audio_frames_.clear(); | |
| 379 | |
| 380 return cdm::kSuccess; | |
| 381 } | |
| 382 | |
| 383 return cdm::kNeedMoreData; | |
| 384 } | |
| 385 | |
| 386 void FFmpegCdmAudioDecoder::ResetTimestampState() { | |
| 387 output_timestamp_helper_->SetBaseTimestamp(media::kNoTimestamp()); | |
| 388 last_input_timestamp_ = media::kNoTimestamp(); | |
| 389 output_bytes_to_drop_ = 0; | |
| 390 } | |
| 391 | |
| 392 void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() { | |
| 393 DVLOG(1) << "ReleaseFFmpegResources()"; | |
| 394 | |
| 395 if (codec_context_) { | |
| 396 av_free(codec_context_->extradata); | |
| 397 avcodec_close(codec_context_); | |
| 398 av_free(codec_context_); | |
| 399 codec_context_ = NULL; | |
| 400 } | |
| 401 if (av_frame_) { | |
| 402 av_free(av_frame_); | |
| 403 av_frame_ = NULL; | |
| 404 } | |
| 405 } | |
| 406 | |
| 407 void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) { | |
| 408 int previous_size = serialized_audio_frames_.size(); | |
| 409 serialized_audio_frames_.resize(previous_size + sizeof(value)); | |
| 410 memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value)); | |
| 411 } | |
| 412 | |
| 413 } // namespace webkit_media | |
| OLD | NEW |