media/filters/ffmpeg_demuxer.cc - Issue 23702007: Render inband text tracks in the media pipeline

Side by Side Diff: media/filters/ffmpeg_demuxer.cc

Issue 23702007: Render inband text tracks in the media pipeline (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: incorporate aaron's comments (10/16) Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/filters/ffmpeg_demuxer.h"	5 #include "media/filters/ffmpeg_demuxer.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <string>	8 #include <string>

9	9

10 #include "base/base64.h"	10 #include "base/base64.h"

11 #include "base/bind.h"	11 #include "base/bind.h"

12 #include "base/callback.h"	12 #include "base/callback.h"

13 #include "base/callback_helpers.h"	13 #include "base/callback_helpers.h"

14 #include "base/command_line.h"

15 #include "base/memory/scoped_ptr.h"	14 #include "base/memory/scoped_ptr.h"

16 #include "base/message_loop/message_loop.h"	15 #include "base/message_loop/message_loop.h"

17 #include "base/metrics/sparse_histogram.h"	16 #include "base/metrics/sparse_histogram.h"

18 #include "base/stl_util.h"

19 #include "base/strings/string_util.h"	17 #include "base/strings/string_util.h"

20 #include "base/strings/stringprintf.h"	18 #include "base/strings/stringprintf.h"

21 #include "base/task_runner_util.h"	19 #include "base/task_runner_util.h"

22 #include "base/time/time.h"	20 #include "base/time/time.h"

23 #include "media/base/audio_decoder_config.h"	21 #include "media/base/audio_decoder_config.h"

24 #include "media/base/bind_to_loop.h"	22 #include "media/base/bind_to_loop.h"

25 #include "media/base/decoder_buffer.h"	23 #include "media/base/decoder_buffer.h"

26 #include "media/base/decrypt_config.h"	24 #include "media/base/decrypt_config.h"

27 #include "media/base/limits.h"	25 #include "media/base/limits.h"

28 #include "media/base/media_log.h"	26 #include "media/base/media_log.h"

29 #include "media/base/media_switches.h"

30 #include "media/base/video_decoder_config.h"	27 #include "media/base/video_decoder_config.h"

31 #include "media/ffmpeg/ffmpeg_common.h"	28 #include "media/ffmpeg/ffmpeg_common.h"

32 #include "media/filters/ffmpeg_glue.h"	29 #include "media/filters/ffmpeg_glue.h"

33 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"	30 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"

34 #include "media/webm/webm_crypto_helpers.h"	31 #include "media/webm/webm_crypto_helpers.h"

35	32

36 namespace media {	33 namespace media {

37	34

38 //	35 //

39 // FFmpegDemuxerStream	36 // FFmpegDemuxerStream

(...skipping 17 matching lines...) Expand all Loading...
57 case AVMEDIA_TYPE_AUDIO:	54 case AVMEDIA_TYPE_AUDIO:

58 type_ = AUDIO;	55 type_ = AUDIO;

59 AVStreamToAudioDecoderConfig(stream, &audio_config_, true);	56 AVStreamToAudioDecoderConfig(stream, &audio_config_, true);

60 is_encrypted = audio_config_.is_encrypted();	57 is_encrypted = audio_config_.is_encrypted();

61 break;	58 break;

62 case AVMEDIA_TYPE_VIDEO:	59 case AVMEDIA_TYPE_VIDEO:

63 type_ = VIDEO;	60 type_ = VIDEO;

64 AVStreamToVideoDecoderConfig(stream, &video_config_, true);	61 AVStreamToVideoDecoderConfig(stream, &video_config_, true);

65 is_encrypted = video_config_.is_encrypted();	62 is_encrypted = video_config_.is_encrypted();

66 break;	63 break;

	64 case AVMEDIA_TYPE_SUBTITLE:

	65 type_ = TEXT;

	66 break;

67 default:	67 default:

68 NOTREACHED();	68 NOTREACHED();

69 break;	69 break;

70 }	70 }

71	71

72 // Calculate the duration.	72 // Calculate the duration.

73 duration_ = ConvertStreamTimestamp(stream->time_base, stream->duration);	73 duration_ = ConvertStreamTimestamp(stream->time_base, stream->duration);

74	74

75 if (stream_->codec->codec_id == AV_CODEC_ID_H264) {	75 if (stream_->codec->codec_id == AV_CODEC_ID_H264) {

76 bitstream_converter_.reset(	76 bitstream_converter_.reset(

(...skipping 26 matching lines...) Expand all Loading...
103 NOTREACHED() << "Attempted to enqueue packet on a stopped stream";	103 NOTREACHED() << "Attempted to enqueue packet on a stopped stream";

104 return;	104 return;

105 }	105 }

106	106

107 // Convert the packet if there is a bitstream filter.	107 // Convert the packet if there is a bitstream filter.

108 if (packet->data && bitstream_converter_enabled_ &&	108 if (packet->data && bitstream_converter_enabled_ &&

109 !bitstream_converter_->ConvertPacket(packet.get())) {	109 !bitstream_converter_->ConvertPacket(packet.get())) {

110 LOG(ERROR) << "Format conversion failed.";	110 LOG(ERROR) << "Format conversion failed.";

111 }	111 }

112	112

113 // Get side data if any. For now, the only type of side_data is VP8 Alpha. We	113 scoped_refptr<DecoderBuffer> buffer;

114 // keep this generic so that other side_data types in the future can be	114

115 // handled the same way as well.	115 // Get side data if any. For now, the only types of side_data are VP8 Alpha,

	116 // and WebVTT id and settings. We keep this generic so that other side_data

	117 // types in the future can be handled the same way as well.

116 av_packet_split_side_data(packet.get());	118 av_packet_split_side_data(packet.get());

117 int side_data_size = 0;	119 if (type() == DemuxerStream::TEXT) {

118 uint8* side_data = av_packet_get_side_data(	120 int id_size = 0;

119 packet.get(),	121 uint8* id_data = av_packet_get_side_data(

120 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,	122 packet.get(),

121 &side_data_size);	123 AV_PKT_DATA_WEBVTT_IDENTIFIER,

	124 &id_size);

122	125

123 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will	126 int settings_size = 0;

124 // reference inner memory of FFmpeg. As such we should transfer the packet	127 uint8* settings_data = av_packet_get_side_data(

125 // into memory we control.	128 packet.get(),

126 scoped_refptr<DecoderBuffer> buffer;	129 AV_PKT_DATA_WEBVTT_SETTINGS,

127 if (side_data_size > 0) {	130 &settings_size);

	131

	132 // The DecoderBuffer only supports a single side data item. In the case of

	133 // a WebVTT cue, we can have potentially two side data items. In order to

	134 // avoid disrupting DecoderBuffer any more than we need to, we copy both

	135 // side data items onto a single one, and terminate each with a NUL marker.

	136 std::vector<uint8> side_data;

	137 side_data.reserve(id_size + 1 + settings_size + 1);

	138 side_data.insert(side_data.end(),

	139 id_data, id_data + id_size);

	140 side_data.push_back(0);

	141 side_data.insert(side_data.end(),

	142 settings_data, settings_data + settings_size);

	143 side_data.push_back(0);

	144

128 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,	145 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

129 side_data, side_data_size);	146 side_data.data(), side_data.size());

130 } else {	147 } else {

131 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);	148 int side_data_size = 0;

	149 uint8* side_data = av_packet_get_side_data(

	150 packet.get(),

	151 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,

	152 &side_data_size);

	153

	154 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will

	155 // reference inner memory of FFmpeg. As such we should transfer the packet

	156 // into memory we control.

	157 if (side_data_size > 0) {

	158 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

	159 side_data, side_data_size);

	160 } else {

	161 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);

	162 }

132 }	163 }

133	164

134 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) \|\|	165 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) \|\|

135 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {	166 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {

136 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(	167 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(

137 packet->data, packet->size,	168 packet->data, packet->size,

138 reinterpret_cast<const uint8*>(encryption_key_id_.data()),	169 reinterpret_cast<const uint8*>(encryption_key_id_.data()),

139 encryption_key_id_.size()));	170 encryption_key_id_.size()));

140 if (!config)	171 if (!config)

141 LOG(ERROR) << "Creation of DecryptConfig failed.";	172 LOG(ERROR) << "Creation of DecryptConfig failed.";

142 buffer->set_decrypt_config(config.Pass());	173 buffer->set_decrypt_config(config.Pass());

143 }	174 }

144	175

145 buffer->set_timestamp(ConvertStreamTimestamp(	176 buffer->set_timestamp(ConvertStreamTimestamp(

146 stream_->time_base, packet->pts));	177 stream_->time_base, packet->pts));

147 buffer->set_duration(ConvertStreamTimestamp(	178 buffer->set_duration(ConvertStreamTimestamp(

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222 CHECK_EQ(type_, AUDIO);	253 CHECK_EQ(type_, AUDIO);

223 return audio_config_;	254 return audio_config_;

224 }	255 }

225	256

226 VideoDecoderConfig FFmpegDemuxerStream::video_decoder_config() {	257 VideoDecoderConfig FFmpegDemuxerStream::video_decoder_config() {

227 DCHECK(message_loop_->BelongsToCurrentThread());	258 DCHECK(message_loop_->BelongsToCurrentThread());

228 CHECK_EQ(type_, VIDEO);	259 CHECK_EQ(type_, VIDEO);

229 return video_config_;	260 return video_config_;

230 }	261 }

231	262

	263 TextTrackConfig FFmpegDemuxerStream::text_track_config() {

	264 DCHECK(message_loop_->BelongsToCurrentThread());

	265 CHECK_EQ(type_, TEXT);

	266 return TextTrackConfig(GetTextKind(),

	267 GetMetadata("title"),

	268 GetMetadata("language"));

	269 }

	270

232 FFmpegDemuxerStream::~FFmpegDemuxerStream() {	271 FFmpegDemuxerStream::~FFmpegDemuxerStream() {

233 DCHECK(!demuxer_);	272 DCHECK(!demuxer_);

234 DCHECK(read_cb_.is_null());	273 DCHECK(read_cb_.is_null());

235 DCHECK(buffer_queue_.IsEmpty());	274 DCHECK(buffer_queue_.IsEmpty());

236 }	275 }

237	276

238 base::TimeDelta FFmpegDemuxerStream::GetElapsedTime() const {	277 base::TimeDelta FFmpegDemuxerStream::GetElapsedTime() const {

239 return ConvertStreamTimestamp(stream_->time_base, stream_->cur_dts);	278 return ConvertStreamTimestamp(stream_->time_base, stream_->cur_dts);

240 }	279 }

241	280

(...skipping 23 matching lines...) Expand all Loading...
265 // TODO(scherkus): Remove early return and reenable time-based capacity	304 // TODO(scherkus): Remove early return and reenable time-based capacity

266 // after our data sources support canceling/concurrent reads, see	305 // after our data sources support canceling/concurrent reads, see

267 // http://crbug.com/165762 for details.	306 // http://crbug.com/165762 for details.

268 return !read_cb_.is_null();	307 return !read_cb_.is_null();

269	308

270 // Try to have one second's worth of encoded data per stream.	309 // Try to have one second's worth of encoded data per stream.

271 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1);	310 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1);

272 return buffer_queue_.IsEmpty() \|\| buffer_queue_.Duration() < kCapacity;	311 return buffer_queue_.IsEmpty() \|\| buffer_queue_.Duration() < kCapacity;

273 }	312 }

274	313

	314 TextKind FFmpegDemuxerStream::GetTextKind() const {

	315 DCHECK_EQ(type_, DemuxerStream::TEXT);

	316

	317 if (stream_->disposition & AV_DISPOSITION_CAPTIONS)

	318 return kTextCaptions;

	319

	320 if (stream_->disposition & AV_DISPOSITION_DESCRIPTIONS)

	321 return kTextDescriptions;

	322

	323 if (stream_->disposition & AV_DISPOSITION_METADATA)

	324 return kTextMetadata;

	325

	326 return kTextSubtitles;

	327 }

	328

	329 std::string FFmpegDemuxerStream::GetMetadata(const char* key) const {

	330 const AVDictionaryEntry* entry =

	331 av_dict_get(stream_->metadata, key, NULL, 0);

	332 return (entry == NULL \|\| entry->value == NULL) ? "" : entry->value;

	333 }

	334

275 // static	335 // static

276 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(	336 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(

277 const AVRational& time_base, int64 timestamp) {	337 const AVRational& time_base, int64 timestamp) {

278 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE))	338 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE))

279 return kNoTimestamp();	339 return kNoTimestamp();

280	340

281 return ConvertFromTimeBase(time_base, timestamp);	341 return ConvertFromTimeBase(time_base, timestamp);

282 }	342 }

283	343

284 //	344 //

285 // FFmpegDemuxer	345 // FFmpegDemuxer

286 //	346 //

287 FFmpegDemuxer::FFmpegDemuxer(	347 FFmpegDemuxer::FFmpegDemuxer(

288 const scoped_refptr<base::MessageLoopProxy>& message_loop,	348 const scoped_refptr<base::MessageLoopProxy>& message_loop,

289 DataSource* data_source,	349 DataSource* data_source,

290 const NeedKeyCB& need_key_cb,	350 const NeedKeyCB& need_key_cb,

	351 bool text_enabled,

291 const scoped_refptr<MediaLog>& media_log)	352 const scoped_refptr<MediaLog>& media_log)

292 : host_(NULL),	353 : host_(NULL),

293 message_loop_(message_loop),	354 message_loop_(message_loop),

294 weak_factory_(this),	355 weak_factory_(this),

295 blocking_thread_("FFmpegDemuxer"),	356 blocking_thread_("FFmpegDemuxer"),

296 pending_read_(false),	357 pending_read_(false),

297 pending_seek_(false),	358 pending_seek_(false),

298 data_source_(data_source),	359 data_source_(data_source),

299 media_log_(media_log),	360 media_log_(media_log),

300 bitrate_(0),	361 bitrate_(0),

301 start_time_(kNoTimestamp()),	362 start_time_(kNoTimestamp()),

302 audio_disabled_(false),	363 audio_disabled_(false),

	364 text_enabled_(text_enabled),

303 duration_known_(false),	365 duration_known_(false),

304 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(	366 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(

305 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),	367 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),

306 need_key_cb_(need_key_cb) {	368 need_key_cb_(need_key_cb) {

307 DCHECK(message_loop_.get());	369 DCHECK(message_loop_.get());

308 DCHECK(data_source_);	370 DCHECK(data_source_);

309 }	371 }

310	372

311 FFmpegDemuxer::~FFmpegDemuxer() {}	373 FFmpegDemuxer::~FFmpegDemuxer() {}

312	374

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397 }	459 }

398 }	460 }

399 return NULL;	461 return NULL;

400 }	462 }

401	463

402 base::TimeDelta FFmpegDemuxer::GetStartTime() const {	464 base::TimeDelta FFmpegDemuxer::GetStartTime() const {

403 DCHECK(message_loop_->BelongsToCurrentThread());	465 DCHECK(message_loop_->BelongsToCurrentThread());

404 return start_time_;	466 return start_time_;

405 }	467 }

406	468

	469 void FFmpegDemuxer::AddTextStreams() {

	470 DCHECK(message_loop_->BelongsToCurrentThread());

	471

	472 for (StreamVector::size_type idx = 0; idx < streams_.size(); ++idx) {

	473 FFmpegDemuxerStream* stream = streams_[idx];

	474 if (stream == NULL \|\| stream->type() != DemuxerStream::TEXT)

	475 continue;

	476

	477 TextKind kind = stream->GetTextKind();

	478 std::string title = stream->GetMetadata("title");

	479 std::string language = stream->GetMetadata("language");

	480

	481 host_->AddTextStream(stream, kind, title, language);

	482 }

	483 }

	484

407 // Helper for calculating the bitrate of the media based on information stored	485 // Helper for calculating the bitrate of the media based on information stored

408 // in \|format_context\| or failing that the size and duration of the media.	486 // in \|format_context\| or failing that the size and duration of the media.

409 //	487 //

410 // Returns 0 if a bitrate could not be determined.	488 // Returns 0 if a bitrate could not be determined.

411 static int CalculateBitrate(	489 static int CalculateBitrate(

412 AVFormatContext* format_context,	490 AVFormatContext* format_context,

413 const base::TimeDelta& duration,	491 const base::TimeDelta& duration,

414 int64 filesize_in_bytes) {	492 int64 filesize_in_bytes) {

415 // If there is a bitrate set on the container, use it.	493 // If there is a bitrate set on the container, use it.

416 if (format_context->bit_rate > 0)	494 if (format_context->bit_rate > 0)

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
515 // Log the codec detected, whether it is supported or not.	593 // Log the codec detected, whether it is supported or not.

516 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec",	594 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec",

517 codec_context->codec_id);	595 codec_context->codec_id);

518 // Ensure the codec is supported. IsValidConfig() also checks that the	596 // Ensure the codec is supported. IsValidConfig() also checks that the

519 // frame size and visible size are valid.	597 // frame size and visible size are valid.

520 AVStreamToVideoDecoderConfig(stream, &video_config, false);	598 AVStreamToVideoDecoderConfig(stream, &video_config, false);

521	599

522 if (!video_config.IsValidConfig())	600 if (!video_config.IsValidConfig())

523 continue;	601 continue;

524 video_stream = stream;	602 video_stream = stream;

	603 } else if (codec_type == AVMEDIA_TYPE_SUBTITLE) {

	604 if (codec_context->codec_id != AV_CODEC_ID_WEBVTT \|\| !text_enabled_) {

	605 continue;

	606 }

525 } else {	607 } else {

526 continue;	608 continue;

527 }	609 }

528	610

529 streams_[i] = new FFmpegDemuxerStream(this, stream);	611 streams_[i] = new FFmpegDemuxerStream(this, stream);

530 max_duration = std::max(max_duration, streams_[i]->duration());	612 max_duration = std::max(max_duration, streams_[i]->duration());

531	613

532 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) {	614 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) {

533 const base::TimeDelta first_dts = ConvertFromTimeBase(	615 const base::TimeDelta first_dts = ConvertFromTimeBase(

534 stream->time_base, stream->first_dts);	616 stream->time_base, stream->first_dts);

535 if (start_time_ == kNoTimestamp() \|\| first_dts < start_time_)	617 if (start_time_ == kNoTimestamp() \|\| first_dts < start_time_)

536 start_time_ = first_dts;	618 start_time_ = first_dts;

537 }	619 }

538 }	620 }

539	621

540 if (!audio_stream && !video_stream) {	622 if (!audio_stream && !video_stream) {

541 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS);	623 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS);

542 return;	624 return;

543 }	625 }

544	626

	627 if (text_enabled_)

	628 AddTextStreams();

	629

545 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {	630 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {

546 // If there is a duration value in the container use that to find the	631 // If there is a duration value in the container use that to find the

547 // maximum between it and the duration from A/V streams.	632 // maximum between it and the duration from A/V streams.

548 const AVRational av_time_base = {1, AV_TIME_BASE};	633 const AVRational av_time_base = {1, AV_TIME_BASE};

549 max_duration =	634 max_duration =

550 std::max(max_duration,	635 std::max(max_duration,

551 ConvertFromTimeBase(av_time_base, format_context->duration));	636 ConvertFromTimeBase(av_time_base, format_context->duration));

552 } else {	637 } else {

553 // The duration is unknown, in which case this is likely a live stream.	638 // The duration is unknown, in which case this is likely a live stream.

554 max_duration = kInfiniteDuration();	639 max_duration = kInfiniteDuration();

(...skipping 274 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
829 }	914 }

830 for (size_t i = 0; i < buffered.size(); ++i)	915 for (size_t i = 0; i < buffered.size(); ++i)

831 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i));	916 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i));

832 }	917 }

833	918

834 void FFmpegDemuxer::OnDataSourceError() {	919 void FFmpegDemuxer::OnDataSourceError() {

835 host_->OnDemuxerError(PIPELINE_ERROR_READ);	920 host_->OnDemuxerError(PIPELINE_ERROR_READ);

836 }	921 }

837	922

838 } // namespace media	923 } // namespace media

OLD	NEW

« media/filters/chunk_demuxer.cc ('K') | « media/filters/ffmpeg_demuxer.h ('k') | media/filters/ffmpeg_demuxer_unittest.cc » ('j') | media/filters/source_buffer_stream.h » ('J')