Index: media/mp2t/es_parser_h264.cc |
diff --git a/media/mp2t/es_parser_h264.cc b/media/mp2t/es_parser_h264.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2bfe2c3fe624dc9df29aaeab566155946a0379d1 |
--- /dev/null |
+++ b/media/mp2t/es_parser_h264.cc |
@@ -0,0 +1,507 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "media/mp2t/es_parser_h264.h" |
+ |
+#include "base/basictypes.h" |
+#include "base/logging.h" |
+#include "media/base/bit_reader.h" |
+#include "media/base/buffers.h" |
+#include "media/base/stream_parser_buffer.h" |
+#include "media/base/video_frame.h" |
+#include "media/mp2t/mp2t_common.h" |
+#include "ui/gfx/rect.h" |
+#include "ui/gfx/size.h" |
+ |
+static const int kExtendedSar = 255; |
+ |
+// ISO 14496 part 10 |
+// VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator" |
+static const int kTableSarWidth[14] = { |
+ 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160 |
+}; |
+ |
+static const int kTableSarHeight[14] = { |
+ 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99 |
+}; |
+ |
+// Remove the start code emulation prevention ( 0x000003 ) |
+// and return the size of the converted buffer. |
+// Note: Size of |buf_rbsp| should be at least |size| to accomodate |
+// the worst case. |
+static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) { |
+ int rbsp_size = 0; |
+ int zero_count = 0; |
+ for (int k = 0; k < size; k++) { |
+ if (buf[k] == 0x3 && zero_count >= 2) { |
+ zero_count = 0; |
+ continue; |
+ } |
+ if (buf[k] == 0) |
+ zero_count++; |
+ else |
+ zero_count = 0; |
+ buf_rbsp[rbsp_size++] = buf[k]; |
+ } |
+ return rbsp_size; |
+} |
+ |
+namespace media { |
+namespace mp2t { |
+ |
+// ISO 14496 - Part 10: Table 7-1 "NAL unit type codes" |
+enum NalUnitType { |
+ kNalUnitTypeNonIdrSlice = 1, |
+ kNalUnitTypeIdrSlice = 5, |
+ kNalUnitTypeSPS = 7, |
+ kNalUnitTypePPS = 8, |
+ kNalUnitTypeAUD = 9, |
+}; |
+ |
+class BitReaderH264 : public BitReader { |
+ public: |
+ BitReaderH264(const uint8* data, off_t size) |
+ : BitReader(data, size) { } |
+ |
+ // Read an unsigned exp-golomb value. |
+ // Return true if successful. |
+ bool ReadBitsExpGolomb(uint32* exp_golomb_value); |
+}; |
+ |
+bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) { |
+ // Get the number of leading zeros. |
+ int zero_count = 0; |
+ while (true) { |
+ int one_bit; |
+ RCHECK(ReadBits(1, &one_bit)); |
+ if (one_bit != 0) |
+ break; |
+ zero_count++; |
+ } |
+ |
+ // If zero_count is greater than 31, the calculated value will overflow. |
+ if (zero_count > 31) { |
+ SkipBits(zero_count); |
+ return false; |
+ } |
+ |
+ // Read the actual value. |
+ uint32 base = (1 << zero_count) - 1; |
+ uint32 offset; |
+ RCHECK(ReadBits(zero_count, &offset)); |
+ *exp_golomb_value = base + offset; |
+ |
+ return true; |
+} |
+ |
+EsParserH264::EsParserH264( |
+ const NewVideoConfigCB& new_video_config_cb, |
+ const EmitBufferCB& emit_buffer_cb) |
+ : new_video_config_cb_(new_video_config_cb), |
+ emit_buffer_cb_(emit_buffer_cb), |
+ es_pos_(0), |
+ current_nal_pos_(-1), |
+ current_access_unit_pos_(-1), |
+ is_key_frame_(false) { |
+} |
+ |
+EsParserH264::~EsParserH264() { |
+} |
+ |
+bool EsParserH264::Parse(const uint8* buf, int size, |
+ base::TimeDelta pts, |
+ base::TimeDelta dts) { |
+ // Note: Parse is invoked each time a PES packet has been reassembled. |
+ // Unfortunately, a PES packet does not necessarily map |
+ // to an h264 access unit, although the HLS recommendation is to use one PES |
+ // for each access unit (but this is just a recommendation and some streams |
+ // do not comply with this recommendation). |
+ |
+ // Link position |raw_es_size| in the ES stream with a timing descriptor. |
+ // HLS recommendation: "In AVC video, you should have both a DTS and a |
+ // PTS in each PES header". |
+ if (dts == kNoTimestamp() && pts == kNoTimestamp()) { |
+ DVLOG(1) << "A timestamp must be provided for each reassembled PES"; |
+ return false; |
+ } |
+ TimingDesc timing_desc; |
+ timing_desc.pts = pts; |
+ timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; |
+ |
+ int raw_es_size; |
+ const uint8* raw_es; |
+ es_byte_queue_.Peek(&raw_es, &raw_es_size); |
+ timing_desc_list_.push_back( |
+ std::pair<int, TimingDesc>(raw_es_size, timing_desc)); |
+ |
+ // Add the incoming bytes to the ES queue. |
+ es_byte_queue_.Push(buf, size); |
+ |
+ // Add NALs from the incoming buffer. |
+ if (!ParseInternal()) |
+ return false; |
+ |
+ // Discard emitted frames |
+ // or every byte that was parsed so far if there is no current frame. |
+ int skip_count = |
+ (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_; |
+ DiscardEs(skip_count); |
+ |
+ return true; |
+} |
+ |
+void EsParserH264::Flush() { |
+ if (current_access_unit_pos_ < 0) |
+ return; |
+ |
+ // Force emitting the last access unit. |
+ int next_aud_pos; |
+ const uint8* raw_es; |
+ es_byte_queue_.Peek(&raw_es, &next_aud_pos); |
+ EmitFrameIfNeeded(next_aud_pos); |
+ current_nal_pos_ = -1; |
+ StartFrame(-1); |
+ |
+ // Discard the emitted frame. |
+ DiscardEs(next_aud_pos); |
+} |
+ |
+void EsParserH264::Reset() { |
+ DVLOG(1) << "EsParserH264::Reset"; |
+ es_byte_queue_.Reset(); |
+ timing_desc_list_.clear(); |
+ es_pos_ = 0; |
+ current_nal_pos_ = -1; |
+ StartFrame(-1); |
+ last_video_decoder_config_ = VideoDecoderConfig(); |
+} |
+ |
+bool EsParserH264::ParseInternal() { |
+ int raw_es_size; |
+ const uint8* raw_es; |
+ es_byte_queue_.Peek(&raw_es, &raw_es_size); |
+ |
+ DCHECK_GE(es_pos_, 0); |
+ DCHECK_LT(es_pos_, raw_es_size); |
+ |
+ // Resume h264 es parsing where it was left. |
+ for ( ; es_pos_ < raw_es_size - 4; es_pos_++) { |
+ // Make sure the syncword is either 00 00 00 01 or 00 00 01 |
+ if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0) |
+ continue; |
+ int syncword_length = 0; |
+ if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1) |
+ syncword_length = 4; |
+ else if (raw_es[es_pos_ + 2] == 1) |
+ syncword_length = 3; |
+ else |
+ continue; |
+ |
+ // Parse the current NAL (and the new NAL then becomes the current one). |
+ if (current_nal_pos_ >= 0) { |
+ int nal_size = es_pos_ - current_nal_pos_; |
+ DCHECK_GT(nal_size, 0); |
+ RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size)); |
+ } |
+ current_nal_pos_ = es_pos_ + syncword_length; |
+ |
+ // Retrieve the NAL type. |
+ int nal_header = raw_es[current_nal_pos_]; |
+ int forbidden_zero_bit = (nal_header >> 7) & 0x1; |
+ RCHECK(forbidden_zero_bit == 0); |
+ NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f); |
+ DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_ |
+ << " type=" << nal_unit_type; |
+ |
+ // Emit a frame if needed. |
+ if (nal_unit_type == kNalUnitTypeAUD) |
+ EmitFrameIfNeeded(es_pos_); |
+ |
+ // Skip the syncword. |
+ es_pos_ += syncword_length; |
+ } |
+ |
+ return true; |
+} |
+ |
+void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) { |
+ // There is no current frame: start a new frame. |
+ if (current_access_unit_pos_ < 0) { |
+ StartFrame(next_aud_pos); |
+ return; |
+ } |
+ |
+ // Get the access unit timing info. |
+ TimingDesc current_timing_desc; |
+ while (!timing_desc_list_.empty() && |
+ timing_desc_list_.front().first <= current_access_unit_pos_) { |
+ current_timing_desc = timing_desc_list_.front().second; |
+ timing_desc_list_.pop_front(); |
+ } |
+ |
+ // Emit a frame. |
+ int raw_es_size; |
+ const uint8* raw_es; |
+ es_byte_queue_.Peek(&raw_es, &raw_es_size); |
+ int access_unit_size = next_aud_pos - current_access_unit_pos_; |
+ scoped_refptr<StreamParserBuffer> stream_parser_buffer = |
+ StreamParserBuffer::CopyFrom( |
+ &raw_es[current_access_unit_pos_], |
+ access_unit_size, |
+ is_key_frame_); |
+ stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); |
+ stream_parser_buffer->set_timestamp(current_timing_desc.pts); |
+ emit_buffer_cb_.Run(stream_parser_buffer); |
+ |
+ // Set the current frame position to the next AUD position. |
+ StartFrame(next_aud_pos); |
+} |
+ |
+void EsParserH264::StartFrame(int aud_pos) { |
+ // Two cases: |
+ // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a |
+ // default value (false). |
+ // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true |
+ // |is_key_frame_| will be updated while parsing the NALs of that frame. |
+ // If any NAL is a non IDR NAL, it will be set to false. |
+ current_access_unit_pos_ = aud_pos; |
+ is_key_frame_ = (aud_pos >= 0); |
+} |
+ |
+void EsParserH264::DiscardEs(int nbytes) { |
+ DCHECK_GE(nbytes, 0); |
+ if (nbytes == 0) |
+ return; |
+ |
+ // Update the position of |
+ // - the parser, |
+ // - the current NAL, |
+ // - the current access unit. |
+ es_pos_ -= nbytes; |
+ if (es_pos_ < 0) |
+ es_pos_ = 0; |
+ |
+ if (current_nal_pos_ >= 0) { |
+ DCHECK_GE(current_nal_pos_, nbytes); |
+ current_nal_pos_ -= nbytes; |
+ } |
+ if (current_access_unit_pos_ >= 0) { |
+ DCHECK_GE(current_access_unit_pos_, nbytes); |
+ current_access_unit_pos_ -= nbytes; |
+ } |
+ |
+ // Update the timing information accordingly. |
+ std::list<std::pair<int, TimingDesc> >::iterator timing_it |
+ = timing_desc_list_.begin(); |
+ for (; timing_it != timing_desc_list_.end(); ++timing_it) |
+ timing_it->first -= nbytes; |
+ |
+ // Discard |nbytes| of ES. |
+ es_byte_queue_.Pop(nbytes); |
+} |
+ |
+bool EsParserH264::NalParser(const uint8* buf, int size) { |
+ // Get the NAL header. |
+ if (size < 1) { |
+ DVLOG(1) << "NalParser: incomplete NAL"; |
+ return false; |
+ } |
+ int nal_header = buf[0]; |
+ buf += 1; |
+ size -= 1; |
+ |
+ int forbidden_zero_bit = (nal_header >> 7) & 0x1; |
+ if (forbidden_zero_bit != 0) |
+ return false; |
+ int nal_ref_idc = (nal_header >> 5) & 0x3; |
+ int nal_unit_type = nal_header & 0x1f; |
+ |
+ // Process the NAL content. |
+ switch (nal_unit_type) { |
+ case kNalUnitTypeSPS: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: SPS"; |
+ // |nal_ref_idc| should not be 0 for a SPS. |
+ if (nal_ref_idc == 0) |
+ return false; |
+ return ProcessSPS(buf, size); |
+ case kNalUnitTypeIdrSlice: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice"; |
+ return true; |
+ case kNalUnitTypeNonIdrSlice: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice"; |
+ is_key_frame_ = false; |
+ return true; |
+ case kNalUnitTypePPS: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: PPS"; |
+ return true; |
+ case kNalUnitTypeAUD: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: AUD"; |
+ return true; |
+ default: |
+ DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type; |
+ return true; |
+ } |
+ |
+ NOTREACHED(); |
+ return false; |
+} |
+ |
+bool EsParserH264::ProcessSPS(const uint8* buf, int size) { |
+ if (size <= 0) |
+ return false; |
+ |
+ // Removes start code emulation prevention. |
+ // TODO(damienv): refactoring in media/base |
+ // so as to have a unique H264 bit reader in Chrome. |
+ scoped_ptr<uint8[]> buf_rbsp(new uint8[size]); |
+ int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get()); |
+ |
+ BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size); |
+ |
+ int profile_idc; |
+ int constraint_setX_flag; |
+ int level_idc; |
+ uint32 seq_parameter_set_id; |
+ uint32 log2_max_frame_num_minus4; |
+ uint32 pic_order_cnt_type; |
+ RCHECK(bit_reader.ReadBits(8, &profile_idc)); |
+ RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag)); |
+ RCHECK(bit_reader.ReadBits(8, &level_idc)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type)); |
+ |
+ // |pic_order_cnt_type| shall be in the range of 0 to 2. |
+ RCHECK(pic_order_cnt_type <= 2); |
+ if (pic_order_cnt_type == 0) { |
+ uint32 log2_max_pic_order_cnt_lsb_minus4; |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4)); |
+ } else if (pic_order_cnt_type == 1) { |
+ // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field| |
+ // corresponds to their codenum not to their actual value. |
+ bool delta_pic_order_always_zero_flag; |
+ uint32 offset_for_non_ref_pic; |
+ uint32 offset_for_top_to_bottom_field; |
+ uint32 num_ref_frames_in_pic_order_cnt_cycle; |
+ RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field)); |
+ RCHECK( |
+ bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); |
+ for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { |
+ uint32 offset_for_ref_frame_codenum; |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum)); |
+ } |
+ } |
+ |
+ uint32 num_ref_frames; |
+ int gaps_in_frame_num_value_allowed_flag; |
+ uint32 pic_width_in_mbs_minus1; |
+ uint32 pic_height_in_map_units_minus1; |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames)); |
+ RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1)); |
+ |
+ int frame_mbs_only_flag; |
+ RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag)); |
+ if (!frame_mbs_only_flag) { |
+ int mb_adaptive_frame_field_flag; |
+ RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag)); |
+ } |
+ |
+ int direct_8x8_inference_flag; |
+ RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag)); |
+ |
+ bool frame_cropping_flag; |
+ uint32 frame_crop_left_offset = 0; |
+ uint32 frame_crop_right_offset = 0; |
+ uint32 frame_crop_top_offset = 0; |
+ uint32 frame_crop_bottom_offset = 0; |
+ RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag)); |
+ if (frame_cropping_flag) { |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset)); |
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset)); |
+ } |
+ |
+ bool vui_parameters_present_flag; |
+ RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag)); |
+ int sar_width = 1; |
+ int sar_height = 1; |
+ if (vui_parameters_present_flag) { |
+ // Read only the aspect ratio information from the VUI section. |
+ // TODO(damienv): check whether other VUI info are useful. |
+ bool aspect_ratio_info_present_flag = false; |
+ RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag)); |
+ if (aspect_ratio_info_present_flag) { |
+ int aspect_ratio_idc; |
+ RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc)); |
+ if (aspect_ratio_idc == kExtendedSar) { |
+ RCHECK(bit_reader.ReadBits(16, &sar_width)); |
+ RCHECK(bit_reader.ReadBits(16, &sar_height)); |
+ } else if (aspect_ratio_idc < 14) { |
+ sar_width = kTableSarWidth[aspect_ratio_idc]; |
+ sar_height = kTableSarHeight[aspect_ratio_idc]; |
+ } |
+ } |
+ } |
+ |
+ if (sar_width != sar_height) { |
+ // TODO(damienv): Support non square pixels. |
+ DVLOG(1) |
+ << "Non square pixel not supported yet:" |
+ << " sar_width=" << sar_width |
+ << " sar_height=" << sar_height; |
+ return false; |
+ } |
+ |
+ // TODO(damienv): a MAP unit can be either 16 or 32 pixels. |
+ // although it's 16 pixels for progressive non MBAFF frames. |
+ gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16, |
+ (pic_height_in_map_units_minus1 + 1) * 16); |
+ gfx::Rect visible_rect( |
+ frame_crop_left_offset, |
+ frame_crop_top_offset, |
+ (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset, |
+ (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset); |
+ |
+ // TODO(damienv): calculate the natural size based |
+ // on the possible aspect ratio coded in the VUI parameters. |
+ gfx::Size natural_size(visible_rect.width(), |
+ visible_rect.height()); |
+ |
+ // TODO(damienv): |
+ // Assuming the SPS is used right away by the PPS |
+ // and the slice headers is a strong assumption. |
+ // In theory, we should process the SPS and PPS |
+ // and only when one of the slice header is switching |
+ // the PPS id, the video decoder config should be changed. |
+ VideoDecoderConfig video_decoder_config( |
+ kCodecH264, |
+ VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv) |
+ VideoFrame::YV12, |
+ coded_size, |
+ visible_rect, |
+ natural_size, |
+ NULL, 0, |
+ false); |
+ |
+ if (!video_decoder_config.Matches(last_video_decoder_config_)) { |
+ DVLOG(1) << "Profile IDC: " << profile_idc; |
+ DVLOG(1) << "Level IDC: " << level_idc; |
+ DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16; |
+ DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16; |
+ DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4; |
+ last_video_decoder_config_ = video_decoder_config; |
+ new_video_config_cb_.Run(video_decoder_config); |
+ } |
+ |
+ return true; |
+} |
+ |
+} // namespace mp2t |
+} // namespace media |
+ |