Commit eb6677cc authored by Jeffrey Kardatzke's avatar Jeffrey Kardatzke Committed by Commit Bot

Add SPS parsing to H265 parser

This adds parsing of SPS from the H265 bitstream. To keep the size down
a little, it doesn't handle VUI and the other extension data yet.
That'll be in the next CL. The design follows the existing H264Parser.

BUG=chrome:1141237,b:153111783
TEST=media_unittests pass

Change-Id: If489ddb5a31fcdb323773a3f530719f51878a897
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2491925
Commit-Queue: Jeffrey Kardatzke <jkardatzke@google.com>
Reviewed-by: default avatarSergey Volk <servolk@chromium.org>
Reviewed-by: default avatarDale Curtis <dalecurtis@chromium.org>
Auto-Submit: Jeffrey Kardatzke <jkardatzke@google.com>
Cr-Commit-Position: refs/heads/master@{#820067}
parent d946d995
This diff is collapsed.
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "base/macros.h" #include "base/macros.h"
#include "media/base/media_export.h" #include "media/base/media_export.h"
#include "media/base/ranges.h" #include "media/base/ranges.h"
#include "media/base/video_color_space.h"
#include "media/video/h264_bit_reader.h" #include "media/video/h264_bit_reader.h"
#include "media/video/h264_parser.h" #include "media/video/h264_parser.h"
...@@ -107,6 +108,140 @@ struct MEDIA_EXPORT H265NALU { ...@@ -107,6 +108,140 @@ struct MEDIA_EXPORT H265NALU {
int nuh_temporal_id_plus1; int nuh_temporal_id_plus1;
}; };
enum {
kMaxLongTermRefPicSets = 32, // 7.4.3.2.1
kMaxShortTermRefPicSets = 64, // 7.4.3.2.1
kMaxSubLayers = 7, // 7.4.3.1 & 7.4.3.2.1 [v|s]ps_max_sub_layers_minus1 + 1
kMaxDpbSize = 16, // A.4.2
kMaxRefIdxActive = 15, // 7.4.7.1 num_ref_idx_l{0,1}_active_minus1 + 1
};
struct MEDIA_EXPORT H265ProfileTierLevel {
H265ProfileTierLevel();
// From Annex A.3.
enum H264ProfileIdc {
kProfileIdcMain = 1,
kProfileIdcMain10 = 2,
kProfileIdcMainStill = 3,
kProfileIdcRangeExtensions = 4,
kProfileIdcHighThroughput = 5,
kProfileIdcScreenContentCoding = 9,
kProfileIdcHighThroughputScreenContentCoding = 11,
};
// Syntax elements.
int general_profile_idc;
int general_level_idc; // 30x the actual level.
// From Table A.8 - General tier and level limits.
int GetMaxLumaPs() const;
// From A.4.2 - Profile-specific level limits for the video profiles.
size_t GetDpbMaxPicBuf() const;
};
struct MEDIA_EXPORT H265ScalingListData {
H265ScalingListData();
enum {
kDefaultScalingListSize0Values = 16, // Table 7-5, all values are 16
kScalingListSizeId0Count = 16, // 7.4.5
kScalingListSizeId1To3Count = 64, // 7.4.5
};
// TODO(jkardatzke): Optimize storage of the 32x32 since only indices 0 and 3
// are actually used. Also change it in the accelerator delegate if that is
// done.
// Syntax elements.
int scaling_list_dc_coef_16x16[6];
int scaling_list_dc_coef_32x32[6];
int scaling_list_4x4[6][kScalingListSizeId0Count];
int scaling_list_8x8[6][kScalingListSizeId1To3Count];
int scaling_list_16x16[6][kScalingListSizeId1To3Count];
int scaling_list_32x32[6][kScalingListSizeId1To3Count];
};
struct MEDIA_EXPORT H265StRefPicSet {
H265StRefPicSet();
// Syntax elements.
int num_negative_pics;
int num_positive_pics;
int delta_poc_s0[kMaxShortTermRefPicSets];
int used_by_curr_pic_s0[kMaxShortTermRefPicSets];
int delta_poc_s1[kMaxShortTermRefPicSets];
int used_by_curr_pic_s1[kMaxShortTermRefPicSets];
// Calculated fields.
int num_delta_pocs;
};
struct MEDIA_EXPORT H265SPS {
H265SPS();
// Syntax elements.
int sps_max_sub_layers_minus1;
bool sps_temporal_id_nesting_flag;
H265ProfileTierLevel profile_tier_level;
int sps_seq_parameter_set_id;
int chroma_format_idc;
bool separate_colour_plane_flag;
int pic_width_in_luma_samples;
int pic_height_in_luma_samples;
int conf_win_left_offset;
int conf_win_right_offset;
int conf_win_top_offset;
int conf_win_bottom_offset;
int bit_depth_luma_minus8;
int bit_depth_chroma_minus8;
int log2_max_pic_order_cnt_lsb_minus4;
int sps_max_dec_pic_buffering_minus1[kMaxSubLayers];
int sps_max_num_reorder_pics[kMaxSubLayers];
int sps_max_latency_increase_plus1[kMaxSubLayers];
int log2_min_luma_coding_block_size_minus3;
int log2_diff_max_min_luma_coding_block_size;
int log2_min_luma_transform_block_size_minus2;
int log2_diff_max_min_luma_transform_block_size;
int max_transform_hierarchy_depth_inter;
int max_transform_hierarchy_depth_intra;
bool scaling_list_enabled_flag;
bool sps_scaling_list_data_present_flag;
H265ScalingListData scaling_list_data;
bool amp_enabled_flag;
bool sample_adaptive_offset_enabled_flag;
bool pcm_enabled_flag;
int pcm_sample_bit_depth_luma_minus1;
int pcm_sample_bit_depth_chroma_minus1;
int log2_min_pcm_luma_coding_block_size_minus3;
int log2_diff_max_min_pcm_luma_coding_block_size;
bool pcm_loop_filter_disabled_flag;
int num_short_term_ref_pic_sets;
H265StRefPicSet st_ref_pic_set[kMaxShortTermRefPicSets];
bool long_term_ref_pics_present_flag;
int num_long_term_ref_pics_sps;
int lt_ref_pic_poc_lsb_sps[kMaxLongTermRefPicSets];
bool used_by_curr_pic_lt_sps_flag[kMaxLongTermRefPicSets];
bool sps_temporal_mvp_enabled_flag;
bool strong_intra_smoothing_enabled_flag;
// Calculated fields.
int chroma_array_type;
int sub_width_c;
int sub_height_c;
size_t max_dpb_size;
int bit_depth_y;
int bit_depth_c;
int max_pic_order_cnt_lsb;
int sps_max_latency_pictures[kMaxSubLayers];
int ctb_log2_size_y;
int pic_width_in_ctbs_y;
int pic_height_in_ctbs_y;
int pic_size_in_ctbs_y;
int max_tb_log2_size_y;
int wp_offset_half_range_y;
int wp_offset_half_range_c;
};
// Class to parse an Annex-B H.265 stream. // Class to parse an Annex-B H.265 stream.
class MEDIA_EXPORT H265Parser { class MEDIA_EXPORT H265Parser {
public: public:
...@@ -138,6 +273,20 @@ class MEDIA_EXPORT H265Parser { ...@@ -138,6 +273,20 @@ class MEDIA_EXPORT H265Parser {
// again, instead of any NALU-type specific parse functions below. // again, instead of any NALU-type specific parse functions below.
Result AdvanceToNextNALU(H265NALU* nalu); Result AdvanceToNextNALU(H265NALU* nalu);
// NALU-specific parsing functions.
// These should be called after AdvanceToNextNALU().
// SPSes are owned by the parser class and the memory for their structures is
// managed here, not by the caller, as they are reused across NALUs.
//
// Parse an SPS NALU and save their data in the parser, returning id of the
// parsed structure in |*sps_id|. To get a pointer to a given SPS structure,
// use GetSPS(), passing the returned |*sps_id| as parameter.
Result ParseSPS(int* sps_id);
// Return a pointer to SPS with given |sps_id| or null if not present.
const H265SPS* GetSPS(int sps_id) const;
private: private:
// Move the stream pointer to the beginning of the next NALU, // Move the stream pointer to the beginning of the next NALU,
// i.e. pointing at the next start code. // i.e. pointing at the next start code.
...@@ -148,6 +297,21 @@ class MEDIA_EXPORT H265Parser { ...@@ -148,6 +297,21 @@ class MEDIA_EXPORT H265Parser {
// - the size in bytes of the start code is returned in |*start_code_size|. // - the size in bytes of the start code is returned in |*start_code_size|.
bool LocateNALU(off_t* nalu_size, off_t* start_code_size); bool LocateNALU(off_t* nalu_size, off_t* start_code_size);
// Exp-Golomb code parsing as specified in chapter 9.2 of the spec.
// Read one unsigned exp-Golomb code from the stream and return in |*val|.
Result ReadUE(int* val);
// Read one signed exp-Golomb code from the stream and return in |*val|.
Result ReadSE(int* val);
Result ParseProfileTierLevel(bool profile_present,
int max_num_sub_layers_minus1,
H265ProfileTierLevel* profile_tier_level);
Result ParseScalingListData(H265ScalingListData* scaling_list_data);
Result ParseStRefPicSet(int st_rps_idx,
const H265SPS& sps,
H265StRefPicSet* st_ref_pic_set);
// Pointer to the current NALU in the stream. // Pointer to the current NALU in the stream.
const uint8_t* stream_; const uint8_t* stream_;
...@@ -156,8 +320,10 @@ class MEDIA_EXPORT H265Parser { ...@@ -156,8 +320,10 @@ class MEDIA_EXPORT H265Parser {
H264BitReader br_; H264BitReader br_;
// Ranges of encrypted bytes in the buffer passed to // SPSes stored for future reference.
// SetEncryptedStream(). std::map<int, std::unique_ptr<H265SPS>> active_sps_;
// Ranges of encrypted bytes in the buffer passed to SetEncryptedStream().
Ranges<const uint8_t*> encrypted_ranges_; Ranges<const uint8_t*> encrypted_ranges_;
DISALLOW_COPY_AND_ASSIGN(H265Parser); DISALLOW_COPY_AND_ASSIGN(H265Parser);
......
...@@ -37,6 +37,17 @@ TEST(H265ParserTest, RawHevcStreamFileParsing) { ...@@ -37,6 +37,17 @@ TEST(H265ParserTest, RawHevcStreamFileParsing) {
++num_parsed_nalus; ++num_parsed_nalus;
DVLOG(4) << "Found NALU " << nalu.nal_unit_type; DVLOG(4) << "Found NALU " << nalu.nal_unit_type;
switch (nalu.nal_unit_type) {
case H265NALU::SPS_NUT:
int sps_id;
res = parser.ParseSPS(&sps_id);
ASSERT_TRUE(!!parser.GetSPS(sps_id));
break;
default:
break;
}
ASSERT_EQ(res, H265Parser::kOk);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment