media/mp2t/es_parser_h264.cc - Issue 23566013: Mpeg2 TS stream parser for media source.

Unified Diff: media/mp2t/es_parser_h264.cc

Issue 23566013: Mpeg2 TS stream parser for media source. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Clang warning fix Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/mp2t/es_parser_h264.cc

diff --git a/media/mp2t/es_parser_h264.cc b/media/mp2t/es_parser_h264.cc

new file mode 100644

index 0000000000000000000000000000000000000000..2bfe2c3fe624dc9df29aaeab566155946a0379d1

--- /dev/null

+++ b/media/mp2t/es_parser_h264.cc

@@ -0,0 +1,507 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "media/mp2t/es_parser_h264.h"

+#include "base/basictypes.h"

+#include "base/logging.h"

+#include "media/base/bit_reader.h"

+#include "media/base/buffers.h"

+#include "media/base/stream_parser_buffer.h"

+#include "media/base/video_frame.h"

+#include "media/mp2t/mp2t_common.h"

+#include "ui/gfx/rect.h"

+#include "ui/gfx/size.h"

+static const int kExtendedSar = 255;

+// ISO 14496 part 10

+// VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator"

+static const int kTableSarWidth[14] = {

+ 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160

+};

+static const int kTableSarHeight[14] = {

+ 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99

+};

+// Remove the start code emulation prevention ( 0x000003 )

+// and return the size of the converted buffer.

+// Note: Size of |buf_rbsp| should be at least |size| to accomodate

+// the worst case.

+static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) {

+ int rbsp_size = 0;

+ int zero_count = 0;

+ for (int k = 0; k < size; k++) {

+ if (buf[k] == 0x3 && zero_count >= 2) {

+ zero_count = 0;

+ continue;

+ }

+ if (buf[k] == 0)

+ zero_count++;

+ else

+ zero_count = 0;

+ buf_rbsp[rbsp_size++] = buf[k];

+ }

+ return rbsp_size;

+namespace media {

+namespace mp2t {

+// ISO 14496 - Part 10: Table 7-1 "NAL unit type codes"

+enum NalUnitType {

+ kNalUnitTypeNonIdrSlice = 1,

+ kNalUnitTypeIdrSlice = 5,

+ kNalUnitTypeSPS = 7,

+ kNalUnitTypePPS = 8,

+ kNalUnitTypeAUD = 9,

+};

+class BitReaderH264 : public BitReader {

+ public:

+ BitReaderH264(const uint8* data, off_t size)

+ : BitReader(data, size) { }

+ // Read an unsigned exp-golomb value.

+ // Return true if successful.

+ bool ReadBitsExpGolomb(uint32* exp_golomb_value);

+};

+bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) {

+ // Get the number of leading zeros.

+ int zero_count = 0;

+ while (true) {

+ int one_bit;

+ RCHECK(ReadBits(1, &one_bit));

+ if (one_bit != 0)

+ break;

+ zero_count++;

+ }

+ // If zero_count is greater than 31, the calculated value will overflow.

+ if (zero_count > 31) {

+ SkipBits(zero_count);

+ return false;

+ }

+ // Read the actual value.

+ uint32 base = (1 << zero_count) - 1;

+ uint32 offset;

+ RCHECK(ReadBits(zero_count, &offset));

+ *exp_golomb_value = base + offset;

+ return true;

+EsParserH264::EsParserH264(

+ const NewVideoConfigCB& new_video_config_cb,

+ const EmitBufferCB& emit_buffer_cb)

+ : new_video_config_cb_(new_video_config_cb),

+ emit_buffer_cb_(emit_buffer_cb),

+ es_pos_(0),

+ current_nal_pos_(-1),

+ current_access_unit_pos_(-1),

+ is_key_frame_(false) {

+EsParserH264::~EsParserH264() {

+bool EsParserH264::Parse(const uint8* buf, int size,

+ base::TimeDelta pts,

+ base::TimeDelta dts) {

+ // Note: Parse is invoked each time a PES packet has been reassembled.

+ // Unfortunately, a PES packet does not necessarily map

+ // to an h264 access unit, although the HLS recommendation is to use one PES

+ // for each access unit (but this is just a recommendation and some streams

+ // do not comply with this recommendation).

+ // Link position |raw_es_size| in the ES stream with a timing descriptor.

+ // HLS recommendation: "In AVC video, you should have both a DTS and a

+ // PTS in each PES header".

+ if (dts == kNoTimestamp() && pts == kNoTimestamp()) {

+ DVLOG(1) << "A timestamp must be provided for each reassembled PES";

+ return false;

+ }

+ TimingDesc timing_desc;

+ timing_desc.pts = pts;

+ timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts;

+ int raw_es_size;

+ const uint8* raw_es;

+ es_byte_queue_.Peek(&raw_es, &raw_es_size);

+ timing_desc_list_.push_back(

+ std::pair<int, TimingDesc>(raw_es_size, timing_desc));

+ // Add the incoming bytes to the ES queue.

+ es_byte_queue_.Push(buf, size);

+ // Add NALs from the incoming buffer.

+ if (!ParseInternal())

+ return false;

+ // Discard emitted frames

+ // or every byte that was parsed so far if there is no current frame.

+ int skip_count =

+ (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_;

+ DiscardEs(skip_count);

+ return true;

+void EsParserH264::Flush() {

+ if (current_access_unit_pos_ < 0)

+ return;

+ // Force emitting the last access unit.

+ int next_aud_pos;

+ const uint8* raw_es;

+ es_byte_queue_.Peek(&raw_es, &next_aud_pos);

+ EmitFrameIfNeeded(next_aud_pos);

+ current_nal_pos_ = -1;

+ StartFrame(-1);

+ // Discard the emitted frame.

+ DiscardEs(next_aud_pos);

+void EsParserH264::Reset() {

+ DVLOG(1) << "EsParserH264::Reset";

+ es_byte_queue_.Reset();

+ timing_desc_list_.clear();

+ es_pos_ = 0;

+ current_nal_pos_ = -1;

+ StartFrame(-1);

+ last_video_decoder_config_ = VideoDecoderConfig();

+bool EsParserH264::ParseInternal() {

+ int raw_es_size;

+ const uint8* raw_es;

+ es_byte_queue_.Peek(&raw_es, &raw_es_size);

+ DCHECK_GE(es_pos_, 0);

+ DCHECK_LT(es_pos_, raw_es_size);

+ // Resume h264 es parsing where it was left.

+ for ( ; es_pos_ < raw_es_size - 4; es_pos_++) {

+ // Make sure the syncword is either 00 00 00 01 or 00 00 01

+ if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0)

+ continue;

+ int syncword_length = 0;

+ if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1)

+ syncword_length = 4;

+ else if (raw_es[es_pos_ + 2] == 1)

+ syncword_length = 3;

+ else

+ continue;

+ // Parse the current NAL (and the new NAL then becomes the current one).

+ if (current_nal_pos_ >= 0) {

+ int nal_size = es_pos_ - current_nal_pos_;

+ DCHECK_GT(nal_size, 0);

+ RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size));

+ }

+ current_nal_pos_ = es_pos_ + syncword_length;

+ // Retrieve the NAL type.

+ int nal_header = raw_es[current_nal_pos_];

+ int forbidden_zero_bit = (nal_header >> 7) & 0x1;

+ RCHECK(forbidden_zero_bit == 0);

+ NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);

+ DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_

+ << " type=" << nal_unit_type;

+ // Emit a frame if needed.

+ if (nal_unit_type == kNalUnitTypeAUD)

+ EmitFrameIfNeeded(es_pos_);

+ // Skip the syncword.

+ es_pos_ += syncword_length;

+ }

+ return true;

+void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) {

+ // There is no current frame: start a new frame.

+ if (current_access_unit_pos_ < 0) {

+ StartFrame(next_aud_pos);

+ return;

+ }

+ // Get the access unit timing info.

+ TimingDesc current_timing_desc;

+ while (!timing_desc_list_.empty() &&

+ timing_desc_list_.front().first <= current_access_unit_pos_) {

+ current_timing_desc = timing_desc_list_.front().second;

+ timing_desc_list_.pop_front();

+ }

+ // Emit a frame.

+ int raw_es_size;

+ const uint8* raw_es;

+ es_byte_queue_.Peek(&raw_es, &raw_es_size);

+ int access_unit_size = next_aud_pos - current_access_unit_pos_;

+ scoped_refptr<StreamParserBuffer> stream_parser_buffer =

+ StreamParserBuffer::CopyFrom(

+ &raw_es[current_access_unit_pos_],

+ access_unit_size,

+ is_key_frame_);

+ stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);

+ stream_parser_buffer->set_timestamp(current_timing_desc.pts);

+ emit_buffer_cb_.Run(stream_parser_buffer);

+ // Set the current frame position to the next AUD position.

+ StartFrame(next_aud_pos);

+void EsParserH264::StartFrame(int aud_pos) {

+ // Two cases:

+ // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a

+ // default value (false).

+ // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true

+ // |is_key_frame_| will be updated while parsing the NALs of that frame.

+ // If any NAL is a non IDR NAL, it will be set to false.

+ current_access_unit_pos_ = aud_pos;

+ is_key_frame_ = (aud_pos >= 0);

+void EsParserH264::DiscardEs(int nbytes) {

+ DCHECK_GE(nbytes, 0);

+ if (nbytes == 0)

+ return;

+ // Update the position of

+ // - the parser,

+ // - the current NAL,

+ // - the current access unit.

+ es_pos_ -= nbytes;

+ if (es_pos_ < 0)

+ es_pos_ = 0;

+ if (current_nal_pos_ >= 0) {

+ DCHECK_GE(current_nal_pos_, nbytes);

+ current_nal_pos_ -= nbytes;

+ }

+ if (current_access_unit_pos_ >= 0) {

+ DCHECK_GE(current_access_unit_pos_, nbytes);

+ current_access_unit_pos_ -= nbytes;

+ }

+ // Update the timing information accordingly.

+ std::list<std::pair<int, TimingDesc> >::iterator timing_it

+ = timing_desc_list_.begin();

+ for (; timing_it != timing_desc_list_.end(); ++timing_it)

+ timing_it->first -= nbytes;

+ // Discard |nbytes| of ES.

+ es_byte_queue_.Pop(nbytes);

+bool EsParserH264::NalParser(const uint8* buf, int size) {

+ // Get the NAL header.

+ if (size < 1) {

+ DVLOG(1) << "NalParser: incomplete NAL";

+ return false;

+ }

+ int nal_header = buf[0];

+ buf += 1;

+ size -= 1;

+ int forbidden_zero_bit = (nal_header >> 7) & 0x1;

+ if (forbidden_zero_bit != 0)

+ return false;

+ int nal_ref_idc = (nal_header >> 5) & 0x3;

+ int nal_unit_type = nal_header & 0x1f;

+ // Process the NAL content.

+ switch (nal_unit_type) {

+ case kNalUnitTypeSPS:

+ DVLOG(LOG_LEVEL_ES) << "NAL: SPS";

+ // |nal_ref_idc| should not be 0 for a SPS.

+ if (nal_ref_idc == 0)

+ return false;

+ return ProcessSPS(buf, size);

+ case kNalUnitTypeIdrSlice:

+ DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice";

+ return true;

+ case kNalUnitTypeNonIdrSlice:

+ DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";

+ is_key_frame_ = false;

+ return true;

+ case kNalUnitTypePPS:

+ DVLOG(LOG_LEVEL_ES) << "NAL: PPS";

+ return true;

+ case kNalUnitTypeAUD:

+ DVLOG(LOG_LEVEL_ES) << "NAL: AUD";

+ return true;

+ default:

+ DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;

+ return true;

+ }

+ NOTREACHED();

+ return false;

+bool EsParserH264::ProcessSPS(const uint8* buf, int size) {

+ if (size <= 0)

+ return false;

+ // Removes start code emulation prevention.

+ // TODO(damienv): refactoring in media/base

+ // so as to have a unique H264 bit reader in Chrome.

+ scoped_ptr<uint8[]> buf_rbsp(new uint8[size]);

+ int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get());

+ BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size);

+ int profile_idc;

+ int constraint_setX_flag;

+ int level_idc;

+ uint32 seq_parameter_set_id;

+ uint32 log2_max_frame_num_minus4;

+ uint32 pic_order_cnt_type;

+ RCHECK(bit_reader.ReadBits(8, &profile_idc));

+ RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));

+ RCHECK(bit_reader.ReadBits(8, &level_idc));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type));

+ // |pic_order_cnt_type| shall be in the range of 0 to 2.

+ RCHECK(pic_order_cnt_type <= 2);

+ if (pic_order_cnt_type == 0) {

+ uint32 log2_max_pic_order_cnt_lsb_minus4;

+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4));

+ } else if (pic_order_cnt_type == 1) {

+ // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field|

+ // corresponds to their codenum not to their actual value.

+ bool delta_pic_order_always_zero_flag;

+ uint32 offset_for_non_ref_pic;

+ uint32 offset_for_top_to_bottom_field;

+ uint32 num_ref_frames_in_pic_order_cnt_cycle;

+ RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field));

+ RCHECK(

+ bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle));

+ for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {

+ uint32 offset_for_ref_frame_codenum;

+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum));

+ }

+ uint32 num_ref_frames;

+ int gaps_in_frame_num_value_allowed_flag;

+ uint32 pic_width_in_mbs_minus1;

+ uint32 pic_height_in_map_units_minus1;

+ RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames));

+ RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1));

+ int frame_mbs_only_flag;

+ RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));

+ if (!frame_mbs_only_flag) {

+ int mb_adaptive_frame_field_flag;

+ RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));

+ }

+ int direct_8x8_inference_flag;

+ RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));

+ bool frame_cropping_flag;

+ uint32 frame_crop_left_offset = 0;

+ uint32 frame_crop_right_offset = 0;

+ uint32 frame_crop_top_offset = 0;

+ uint32 frame_crop_bottom_offset = 0;

+ RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));

+ if (frame_cropping_flag) {

+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset));

+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset));

+ }

+ bool vui_parameters_present_flag;

+ RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));

+ int sar_width = 1;

+ int sar_height = 1;

+ if (vui_parameters_present_flag) {

+ // Read only the aspect ratio information from the VUI section.

+ // TODO(damienv): check whether other VUI info are useful.

+ bool aspect_ratio_info_present_flag = false;

+ RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));

+ if (aspect_ratio_info_present_flag) {

+ int aspect_ratio_idc;

+ RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));

+ if (aspect_ratio_idc == kExtendedSar) {

+ RCHECK(bit_reader.ReadBits(16, &sar_width));

+ RCHECK(bit_reader.ReadBits(16, &sar_height));

+ } else if (aspect_ratio_idc < 14) {

+ sar_width = kTableSarWidth[aspect_ratio_idc];

+ sar_height = kTableSarHeight[aspect_ratio_idc];

+ }

+ if (sar_width != sar_height) {

+ // TODO(damienv): Support non square pixels.

+ DVLOG(1)

+ << "Non square pixel not supported yet:"

+ << " sar_width=" << sar_width

+ << " sar_height=" << sar_height;

+ return false;

+ }

+ // TODO(damienv): a MAP unit can be either 16 or 32 pixels.

+ // although it's 16 pixels for progressive non MBAFF frames.

+ gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,

+ (pic_height_in_map_units_minus1 + 1) * 16);

+ gfx::Rect visible_rect(

+ frame_crop_left_offset,

+ frame_crop_top_offset,

+ (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,

+ (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);

+ // TODO(damienv): calculate the natural size based

+ // on the possible aspect ratio coded in the VUI parameters.

+ gfx::Size natural_size(visible_rect.width(),

+ visible_rect.height());

+ // TODO(damienv):

+ // Assuming the SPS is used right away by the PPS

+ // and the slice headers is a strong assumption.

+ // In theory, we should process the SPS and PPS

+ // and only when one of the slice header is switching

+ // the PPS id, the video decoder config should be changed.

+ VideoDecoderConfig video_decoder_config(

+ kCodecH264,

+ VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv)

+ VideoFrame::YV12,

+ coded_size,

+ visible_rect,

+ natural_size,

+ NULL, 0,

+ false);

+ if (!video_decoder_config.Matches(last_video_decoder_config_)) {

+ DVLOG(1) << "Profile IDC: " << profile_idc;

+ DVLOG(1) << "Level IDC: " << level_idc;

+ DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;

+ DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;

+ DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;

+ last_video_decoder_config_ = video_decoder_config;

+ new_video_config_cb_.Run(video_decoder_config);

+ }

+ return true;

+} // namespace mp2t

+} // namespace media

« no previous file with comments | « media/mp2t/es_parser_h264.h ('k') | media/mp2t/mp2t_common.h » ('j') | no next file with comments »