Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(582)

Side by Side Diff: media/filters/ffmpeg_demuxer.cc

Issue 23702007: Render inband text tracks in the media pipeline (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: incorporate aaron's comments (10/16) Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/filters/ffmpeg_demuxer.h" 5 #include "media/filters/ffmpeg_demuxer.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <string> 8 #include <string>
9 9
10 #include "base/base64.h" 10 #include "base/base64.h"
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/callback.h" 12 #include "base/callback.h"
13 #include "base/callback_helpers.h" 13 #include "base/callback_helpers.h"
14 #include "base/command_line.h"
15 #include "base/memory/scoped_ptr.h" 14 #include "base/memory/scoped_ptr.h"
16 #include "base/message_loop/message_loop.h" 15 #include "base/message_loop/message_loop.h"
17 #include "base/metrics/sparse_histogram.h" 16 #include "base/metrics/sparse_histogram.h"
18 #include "base/stl_util.h"
19 #include "base/strings/string_util.h" 17 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h" 18 #include "base/strings/stringprintf.h"
21 #include "base/task_runner_util.h" 19 #include "base/task_runner_util.h"
22 #include "base/time/time.h" 20 #include "base/time/time.h"
23 #include "media/base/audio_decoder_config.h" 21 #include "media/base/audio_decoder_config.h"
24 #include "media/base/bind_to_loop.h" 22 #include "media/base/bind_to_loop.h"
25 #include "media/base/decoder_buffer.h" 23 #include "media/base/decoder_buffer.h"
26 #include "media/base/decrypt_config.h" 24 #include "media/base/decrypt_config.h"
27 #include "media/base/limits.h" 25 #include "media/base/limits.h"
28 #include "media/base/media_log.h" 26 #include "media/base/media_log.h"
29 #include "media/base/media_switches.h"
30 #include "media/base/video_decoder_config.h" 27 #include "media/base/video_decoder_config.h"
31 #include "media/ffmpeg/ffmpeg_common.h" 28 #include "media/ffmpeg/ffmpeg_common.h"
32 #include "media/filters/ffmpeg_glue.h" 29 #include "media/filters/ffmpeg_glue.h"
33 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h" 30 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"
34 #include "media/webm/webm_crypto_helpers.h" 31 #include "media/webm/webm_crypto_helpers.h"
35 32
36 namespace media { 33 namespace media {
37 34
38 // 35 //
39 // FFmpegDemuxerStream 36 // FFmpegDemuxerStream
(...skipping 17 matching lines...) Expand all
57 case AVMEDIA_TYPE_AUDIO: 54 case AVMEDIA_TYPE_AUDIO:
58 type_ = AUDIO; 55 type_ = AUDIO;
59 AVStreamToAudioDecoderConfig(stream, &audio_config_, true); 56 AVStreamToAudioDecoderConfig(stream, &audio_config_, true);
60 is_encrypted = audio_config_.is_encrypted(); 57 is_encrypted = audio_config_.is_encrypted();
61 break; 58 break;
62 case AVMEDIA_TYPE_VIDEO: 59 case AVMEDIA_TYPE_VIDEO:
63 type_ = VIDEO; 60 type_ = VIDEO;
64 AVStreamToVideoDecoderConfig(stream, &video_config_, true); 61 AVStreamToVideoDecoderConfig(stream, &video_config_, true);
65 is_encrypted = video_config_.is_encrypted(); 62 is_encrypted = video_config_.is_encrypted();
66 break; 63 break;
64 case AVMEDIA_TYPE_SUBTITLE:
65 type_ = TEXT;
66 break;
67 default: 67 default:
68 NOTREACHED(); 68 NOTREACHED();
69 break; 69 break;
70 } 70 }
71 71
72 // Calculate the duration. 72 // Calculate the duration.
73 duration_ = ConvertStreamTimestamp(stream->time_base, stream->duration); 73 duration_ = ConvertStreamTimestamp(stream->time_base, stream->duration);
74 74
75 if (stream_->codec->codec_id == AV_CODEC_ID_H264) { 75 if (stream_->codec->codec_id == AV_CODEC_ID_H264) {
76 bitstream_converter_.reset( 76 bitstream_converter_.reset(
(...skipping 26 matching lines...) Expand all
103 NOTREACHED() << "Attempted to enqueue packet on a stopped stream"; 103 NOTREACHED() << "Attempted to enqueue packet on a stopped stream";
104 return; 104 return;
105 } 105 }
106 106
107 // Convert the packet if there is a bitstream filter. 107 // Convert the packet if there is a bitstream filter.
108 if (packet->data && bitstream_converter_enabled_ && 108 if (packet->data && bitstream_converter_enabled_ &&
109 !bitstream_converter_->ConvertPacket(packet.get())) { 109 !bitstream_converter_->ConvertPacket(packet.get())) {
110 LOG(ERROR) << "Format conversion failed."; 110 LOG(ERROR) << "Format conversion failed.";
111 } 111 }
112 112
113 // Get side data if any. For now, the only type of side_data is VP8 Alpha. We 113 scoped_refptr<DecoderBuffer> buffer;
114 // keep this generic so that other side_data types in the future can be 114
115 // handled the same way as well. 115 // Get side data if any. For now, the only types of side_data are VP8 Alpha,
116 // and WebVTT id and settings. We keep this generic so that other side_data
117 // types in the future can be handled the same way as well.
116 av_packet_split_side_data(packet.get()); 118 av_packet_split_side_data(packet.get());
117 int side_data_size = 0; 119 if (type() == DemuxerStream::TEXT) {
118 uint8* side_data = av_packet_get_side_data( 120 int id_size = 0;
119 packet.get(), 121 uint8* id_data = av_packet_get_side_data(
120 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL, 122 packet.get(),
121 &side_data_size); 123 AV_PKT_DATA_WEBVTT_IDENTIFIER,
124 &id_size);
122 125
123 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will 126 int settings_size = 0;
124 // reference inner memory of FFmpeg. As such we should transfer the packet 127 uint8* settings_data = av_packet_get_side_data(
125 // into memory we control. 128 packet.get(),
126 scoped_refptr<DecoderBuffer> buffer; 129 AV_PKT_DATA_WEBVTT_SETTINGS,
127 if (side_data_size > 0) { 130 &settings_size);
131
132 // The DecoderBuffer only supports a single side data item. In the case of
133 // a WebVTT cue, we can have potentially two side data items. In order to
134 // avoid disrupting DecoderBuffer any more than we need to, we copy both
135 // side data items onto a single one, and terminate each with a NUL marker.
136 std::vector<uint8> side_data;
137 side_data.reserve(id_size + 1 + settings_size + 1);
138 side_data.insert(side_data.end(),
139 id_data, id_data + id_size);
140 side_data.push_back(0);
141 side_data.insert(side_data.end(),
142 settings_data, settings_data + settings_size);
143 side_data.push_back(0);
144
128 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size, 145 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,
129 side_data, side_data_size); 146 side_data.data(), side_data.size());
130 } else { 147 } else {
131 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size); 148 int side_data_size = 0;
149 uint8* side_data = av_packet_get_side_data(
150 packet.get(),
151 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
152 &side_data_size);
153
154 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will
155 // reference inner memory of FFmpeg. As such we should transfer the packet
156 // into memory we control.
157 if (side_data_size > 0) {
158 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,
159 side_data, side_data_size);
160 } else {
161 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);
162 }
132 } 163 }
133 164
134 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) || 165 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) ||
135 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) { 166 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {
136 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig( 167 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(
137 packet->data, packet->size, 168 packet->data, packet->size,
138 reinterpret_cast<const uint8*>(encryption_key_id_.data()), 169 reinterpret_cast<const uint8*>(encryption_key_id_.data()),
139 encryption_key_id_.size())); 170 encryption_key_id_.size()));
140 if (!config) 171 if (!config)
141 LOG(ERROR) << "Creation of DecryptConfig failed."; 172 LOG(ERROR) << "Creation of DecryptConfig failed.";
142 buffer->set_decrypt_config(config.Pass()); 173 buffer->set_decrypt_config(config.Pass());
143 } 174 }
144 175
145 buffer->set_timestamp(ConvertStreamTimestamp( 176 buffer->set_timestamp(ConvertStreamTimestamp(
146 stream_->time_base, packet->pts)); 177 stream_->time_base, packet->pts));
147 buffer->set_duration(ConvertStreamTimestamp( 178 buffer->set_duration(ConvertStreamTimestamp(
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 CHECK_EQ(type_, AUDIO); 253 CHECK_EQ(type_, AUDIO);
223 return audio_config_; 254 return audio_config_;
224 } 255 }
225 256
226 VideoDecoderConfig FFmpegDemuxerStream::video_decoder_config() { 257 VideoDecoderConfig FFmpegDemuxerStream::video_decoder_config() {
227 DCHECK(message_loop_->BelongsToCurrentThread()); 258 DCHECK(message_loop_->BelongsToCurrentThread());
228 CHECK_EQ(type_, VIDEO); 259 CHECK_EQ(type_, VIDEO);
229 return video_config_; 260 return video_config_;
230 } 261 }
231 262
263 TextTrackConfig FFmpegDemuxerStream::text_track_config() {
264 DCHECK(message_loop_->BelongsToCurrentThread());
265 CHECK_EQ(type_, TEXT);
266 return TextTrackConfig(GetTextKind(),
267 GetMetadata("title"),
268 GetMetadata("language"));
269 }
270
232 FFmpegDemuxerStream::~FFmpegDemuxerStream() { 271 FFmpegDemuxerStream::~FFmpegDemuxerStream() {
233 DCHECK(!demuxer_); 272 DCHECK(!demuxer_);
234 DCHECK(read_cb_.is_null()); 273 DCHECK(read_cb_.is_null());
235 DCHECK(buffer_queue_.IsEmpty()); 274 DCHECK(buffer_queue_.IsEmpty());
236 } 275 }
237 276
238 base::TimeDelta FFmpegDemuxerStream::GetElapsedTime() const { 277 base::TimeDelta FFmpegDemuxerStream::GetElapsedTime() const {
239 return ConvertStreamTimestamp(stream_->time_base, stream_->cur_dts); 278 return ConvertStreamTimestamp(stream_->time_base, stream_->cur_dts);
240 } 279 }
241 280
(...skipping 23 matching lines...) Expand all
265 // TODO(scherkus): Remove early return and reenable time-based capacity 304 // TODO(scherkus): Remove early return and reenable time-based capacity
266 // after our data sources support canceling/concurrent reads, see 305 // after our data sources support canceling/concurrent reads, see
267 // http://crbug.com/165762 for details. 306 // http://crbug.com/165762 for details.
268 return !read_cb_.is_null(); 307 return !read_cb_.is_null();
269 308
270 // Try to have one second's worth of encoded data per stream. 309 // Try to have one second's worth of encoded data per stream.
271 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1); 310 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1);
272 return buffer_queue_.IsEmpty() || buffer_queue_.Duration() < kCapacity; 311 return buffer_queue_.IsEmpty() || buffer_queue_.Duration() < kCapacity;
273 } 312 }
274 313
314 TextKind FFmpegDemuxerStream::GetTextKind() const {
315 DCHECK_EQ(type_, DemuxerStream::TEXT);
316
317 if (stream_->disposition & AV_DISPOSITION_CAPTIONS)
318 return kTextCaptions;
319
320 if (stream_->disposition & AV_DISPOSITION_DESCRIPTIONS)
321 return kTextDescriptions;
322
323 if (stream_->disposition & AV_DISPOSITION_METADATA)
324 return kTextMetadata;
325
326 return kTextSubtitles;
327 }
328
329 std::string FFmpegDemuxerStream::GetMetadata(const char* key) const {
330 const AVDictionaryEntry* entry =
331 av_dict_get(stream_->metadata, key, NULL, 0);
332 return (entry == NULL || entry->value == NULL) ? "" : entry->value;
333 }
334
275 // static 335 // static
276 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp( 336 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(
277 const AVRational& time_base, int64 timestamp) { 337 const AVRational& time_base, int64 timestamp) {
278 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE)) 338 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE))
279 return kNoTimestamp(); 339 return kNoTimestamp();
280 340
281 return ConvertFromTimeBase(time_base, timestamp); 341 return ConvertFromTimeBase(time_base, timestamp);
282 } 342 }
283 343
284 // 344 //
285 // FFmpegDemuxer 345 // FFmpegDemuxer
286 // 346 //
287 FFmpegDemuxer::FFmpegDemuxer( 347 FFmpegDemuxer::FFmpegDemuxer(
288 const scoped_refptr<base::MessageLoopProxy>& message_loop, 348 const scoped_refptr<base::MessageLoopProxy>& message_loop,
289 DataSource* data_source, 349 DataSource* data_source,
290 const NeedKeyCB& need_key_cb, 350 const NeedKeyCB& need_key_cb,
351 bool text_enabled,
291 const scoped_refptr<MediaLog>& media_log) 352 const scoped_refptr<MediaLog>& media_log)
292 : host_(NULL), 353 : host_(NULL),
293 message_loop_(message_loop), 354 message_loop_(message_loop),
294 weak_factory_(this), 355 weak_factory_(this),
295 blocking_thread_("FFmpegDemuxer"), 356 blocking_thread_("FFmpegDemuxer"),
296 pending_read_(false), 357 pending_read_(false),
297 pending_seek_(false), 358 pending_seek_(false),
298 data_source_(data_source), 359 data_source_(data_source),
299 media_log_(media_log), 360 media_log_(media_log),
300 bitrate_(0), 361 bitrate_(0),
301 start_time_(kNoTimestamp()), 362 start_time_(kNoTimestamp()),
302 audio_disabled_(false), 363 audio_disabled_(false),
364 text_enabled_(text_enabled),
303 duration_known_(false), 365 duration_known_(false),
304 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind( 366 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(
305 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))), 367 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),
306 need_key_cb_(need_key_cb) { 368 need_key_cb_(need_key_cb) {
307 DCHECK(message_loop_.get()); 369 DCHECK(message_loop_.get());
308 DCHECK(data_source_); 370 DCHECK(data_source_);
309 } 371 }
310 372
311 FFmpegDemuxer::~FFmpegDemuxer() {} 373 FFmpegDemuxer::~FFmpegDemuxer() {}
312 374
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 } 459 }
398 } 460 }
399 return NULL; 461 return NULL;
400 } 462 }
401 463
402 base::TimeDelta FFmpegDemuxer::GetStartTime() const { 464 base::TimeDelta FFmpegDemuxer::GetStartTime() const {
403 DCHECK(message_loop_->BelongsToCurrentThread()); 465 DCHECK(message_loop_->BelongsToCurrentThread());
404 return start_time_; 466 return start_time_;
405 } 467 }
406 468
469 void FFmpegDemuxer::AddTextStreams() {
470 DCHECK(message_loop_->BelongsToCurrentThread());
471
472 for (StreamVector::size_type idx = 0; idx < streams_.size(); ++idx) {
473 FFmpegDemuxerStream* stream = streams_[idx];
474 if (stream == NULL || stream->type() != DemuxerStream::TEXT)
475 continue;
476
477 TextKind kind = stream->GetTextKind();
478 std::string title = stream->GetMetadata("title");
479 std::string language = stream->GetMetadata("language");
480
481 host_->AddTextStream(stream, kind, title, language);
482 }
483 }
484
407 // Helper for calculating the bitrate of the media based on information stored 485 // Helper for calculating the bitrate of the media based on information stored
408 // in |format_context| or failing that the size and duration of the media. 486 // in |format_context| or failing that the size and duration of the media.
409 // 487 //
410 // Returns 0 if a bitrate could not be determined. 488 // Returns 0 if a bitrate could not be determined.
411 static int CalculateBitrate( 489 static int CalculateBitrate(
412 AVFormatContext* format_context, 490 AVFormatContext* format_context,
413 const base::TimeDelta& duration, 491 const base::TimeDelta& duration,
414 int64 filesize_in_bytes) { 492 int64 filesize_in_bytes) {
415 // If there is a bitrate set on the container, use it. 493 // If there is a bitrate set on the container, use it.
416 if (format_context->bit_rate > 0) 494 if (format_context->bit_rate > 0)
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
515 // Log the codec detected, whether it is supported or not. 593 // Log the codec detected, whether it is supported or not.
516 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec", 594 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec",
517 codec_context->codec_id); 595 codec_context->codec_id);
518 // Ensure the codec is supported. IsValidConfig() also checks that the 596 // Ensure the codec is supported. IsValidConfig() also checks that the
519 // frame size and visible size are valid. 597 // frame size and visible size are valid.
520 AVStreamToVideoDecoderConfig(stream, &video_config, false); 598 AVStreamToVideoDecoderConfig(stream, &video_config, false);
521 599
522 if (!video_config.IsValidConfig()) 600 if (!video_config.IsValidConfig())
523 continue; 601 continue;
524 video_stream = stream; 602 video_stream = stream;
603 } else if (codec_type == AVMEDIA_TYPE_SUBTITLE) {
604 if (codec_context->codec_id != AV_CODEC_ID_WEBVTT || !text_enabled_) {
605 continue;
606 }
525 } else { 607 } else {
526 continue; 608 continue;
527 } 609 }
528 610
529 streams_[i] = new FFmpegDemuxerStream(this, stream); 611 streams_[i] = new FFmpegDemuxerStream(this, stream);
530 max_duration = std::max(max_duration, streams_[i]->duration()); 612 max_duration = std::max(max_duration, streams_[i]->duration());
531 613
532 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) { 614 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) {
533 const base::TimeDelta first_dts = ConvertFromTimeBase( 615 const base::TimeDelta first_dts = ConvertFromTimeBase(
534 stream->time_base, stream->first_dts); 616 stream->time_base, stream->first_dts);
535 if (start_time_ == kNoTimestamp() || first_dts < start_time_) 617 if (start_time_ == kNoTimestamp() || first_dts < start_time_)
536 start_time_ = first_dts; 618 start_time_ = first_dts;
537 } 619 }
538 } 620 }
539 621
540 if (!audio_stream && !video_stream) { 622 if (!audio_stream && !video_stream) {
541 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS); 623 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS);
542 return; 624 return;
543 } 625 }
544 626
627 if (text_enabled_)
628 AddTextStreams();
629
545 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) { 630 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {
546 // If there is a duration value in the container use that to find the 631 // If there is a duration value in the container use that to find the
547 // maximum between it and the duration from A/V streams. 632 // maximum between it and the duration from A/V streams.
548 const AVRational av_time_base = {1, AV_TIME_BASE}; 633 const AVRational av_time_base = {1, AV_TIME_BASE};
549 max_duration = 634 max_duration =
550 std::max(max_duration, 635 std::max(max_duration,
551 ConvertFromTimeBase(av_time_base, format_context->duration)); 636 ConvertFromTimeBase(av_time_base, format_context->duration));
552 } else { 637 } else {
553 // The duration is unknown, in which case this is likely a live stream. 638 // The duration is unknown, in which case this is likely a live stream.
554 max_duration = kInfiniteDuration(); 639 max_duration = kInfiniteDuration();
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
829 } 914 }
830 for (size_t i = 0; i < buffered.size(); ++i) 915 for (size_t i = 0; i < buffered.size(); ++i)
831 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i)); 916 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i));
832 } 917 }
833 918
834 void FFmpegDemuxer::OnDataSourceError() { 919 void FFmpegDemuxer::OnDataSourceError() {
835 host_->OnDemuxerError(PIPELINE_ERROR_READ); 920 host_->OnDemuxerError(PIPELINE_ERROR_READ);
836 } 921 }
837 922
838 } // namespace media 923 } // namespace media
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698