Merge pull request #313 from pengbins:fix_ts_h265reader_parse_sps

PiperOrigin-RevId: 527259619
(cherry picked from commit fab134f0b3)
This commit is contained in:
Ian Baker 2023-04-26 15:53:58 +01:00
parent 20924724fc
commit 31492031c1
11 changed files with 317 additions and 201 deletions

View file

@ -8,6 +8,10 @@
* Fix issue where last frame may not be rendered if the last sample with * Fix issue where last frame may not be rendered if the last sample with
frames is dequeued without reading the 'end of stream' sample. frames is dequeued without reading the 'end of stream' sample.
([#11079](https://github.com/google/ExoPlayer/issues/11079)). ([#11079](https://github.com/google/ExoPlayer/issues/11079)).
* Extractors:
* Fix parsing of H.265 SPS in MPEG-TS files by re-using the parsing logic
already used by RTSP and MP4 extractors
([#303](https://github.com/androidx/media/issues/303)).
* Session: * Session:
* Fix issue where `MediaController` doesn't update its available commands * Fix issue where `MediaController` doesn't update its available commands
when connected to a legacy `MediaSessionCompat` that updates its when connected to a legacy `MediaSessionCompat` that updates its

View file

@ -63,6 +63,7 @@ public class TsPlaybackTest {
"sample_h264_mpeg_audio.ts", "sample_h264_mpeg_audio.ts",
"sample_h264_no_access_unit_delimiters.ts", "sample_h264_no_access_unit_delimiters.ts",
"sample_h265.ts", "sample_h265.ts",
"sample_h265_rps_pred.ts",
"sample_latm.ts", "sample_latm.ts",
"sample_scte35.ts", "sample_scte35.ts",
"sample_with_id3.adts", "sample_with_id3.adts",

View file

@ -15,15 +15,12 @@
*/ */
package androidx.media3.extractor.ts; package androidx.media3.extractor.ts;
import static java.lang.Math.min;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import androidx.media3.common.C; import androidx.media3.common.C;
import androidx.media3.common.Format; import androidx.media3.common.Format;
import androidx.media3.common.MimeTypes; import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.CodecSpecificDataUtil; import androidx.media3.common.util.CodecSpecificDataUtil;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray; import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.UnstableApi; import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util; import androidx.media3.common.util.Util;
@ -246,216 +243,30 @@ public final class H265Reader implements ElementaryStreamReader {
System.arraycopy(sps.nalData, 0, csdData, vps.nalLength, sps.nalLength); System.arraycopy(sps.nalData, 0, csdData, vps.nalLength, sps.nalLength);
System.arraycopy(pps.nalData, 0, csdData, vps.nalLength + sps.nalLength, pps.nalLength); System.arraycopy(pps.nalData, 0, csdData, vps.nalLength + sps.nalLength, pps.nalLength);
// Parse the SPS NAL unit, as per H.265/HEVC (2014) 7.3.2.2.1. // Skip the 3-byte NAL unit start code synthesised by the NalUnitTargetBuffer constructor.
ParsableNalUnitBitArray bitArray = new ParsableNalUnitBitArray(sps.nalData, 0, sps.nalLength); NalUnitUtil.H265SpsData spsData =
bitArray.skipBits(40 + 4); // NAL header, sps_video_parameter_set_id NalUnitUtil.parseH265SpsNalUnit(sps.nalData, /* nalOffset= */ 3, sps.nalLength);
int maxSubLayersMinus1 = bitArray.readBits(3);
bitArray.skipBit(); // sps_temporal_id_nesting_flag
int generalProfileSpace = bitArray.readBits(2);
boolean generalTierFlag = bitArray.readBit();
int generalProfileIdc = bitArray.readBits(5);
int generalProfileCompatibilityFlags = 0;
for (int i = 0; i < 32; i++) {
if (bitArray.readBit()) {
generalProfileCompatibilityFlags |= (1 << i);
}
}
int[] constraintBytes = new int[6];
for (int i = 0; i < constraintBytes.length; ++i) {
constraintBytes[i] = bitArray.readBits(8);
}
int generalLevelIdc = bitArray.readBits(8);
int toSkip = 0;
for (int i = 0; i < maxSubLayersMinus1; i++) {
if (bitArray.readBit()) { // sub_layer_profile_present_flag[i]
toSkip += 89;
}
if (bitArray.readBit()) { // sub_layer_level_present_flag[i]
toSkip += 8;
}
}
bitArray.skipBits(toSkip);
if (maxSubLayersMinus1 > 0) {
bitArray.skipBits(2 * (8 - maxSubLayersMinus1));
}
bitArray.readUnsignedExpGolombCodedInt(); // sps_seq_parameter_set_id
int chromaFormatIdc = bitArray.readUnsignedExpGolombCodedInt();
if (chromaFormatIdc == 3) {
bitArray.skipBit(); // separate_colour_plane_flag
}
int picWidthInLumaSamples = bitArray.readUnsignedExpGolombCodedInt();
int picHeightInLumaSamples = bitArray.readUnsignedExpGolombCodedInt();
if (bitArray.readBit()) { // conformance_window_flag
int confWinLeftOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinRightOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinTopOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinBottomOffset = bitArray.readUnsignedExpGolombCodedInt();
// H.265/HEVC (2014) Table 6-1
int subWidthC = chromaFormatIdc == 1 || chromaFormatIdc == 2 ? 2 : 1;
int subHeightC = chromaFormatIdc == 1 ? 2 : 1;
picWidthInLumaSamples -= subWidthC * (confWinLeftOffset + confWinRightOffset);
picHeightInLumaSamples -= subHeightC * (confWinTopOffset + confWinBottomOffset);
}
bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_luma_minus8
bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_chroma_minus8
int log2MaxPicOrderCntLsbMinus4 = bitArray.readUnsignedExpGolombCodedInt();
// for (i = sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1; ...)
for (int i = bitArray.readBit() ? 0 : maxSubLayersMinus1; i <= maxSubLayersMinus1; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_dec_pic_buffering_minus1[i]
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_num_reorder_pics[i]
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_latency_increase_plus1[i]
}
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_luma_coding_block_size_minus3
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_coding_block_size
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_luma_transform_block_size_minus2
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_transform_block_size
bitArray.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_inter
bitArray.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_intra
// if (scaling_list_enabled_flag) { if (sps_scaling_list_data_present_flag) {...}}
boolean scalingListEnabled = bitArray.readBit();
if (scalingListEnabled && bitArray.readBit()) {
skipScalingList(bitArray);
}
bitArray.skipBits(2); // amp_enabled_flag (1), sample_adaptive_offset_enabled_flag (1)
if (bitArray.readBit()) { // pcm_enabled_flag
// pcm_sample_bit_depth_luma_minus1 (4), pcm_sample_bit_depth_chroma_minus1 (4)
bitArray.skipBits(8);
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_pcm_luma_coding_block_size_minus3
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_pcm_luma_coding_block_size
bitArray.skipBit(); // pcm_loop_filter_disabled_flag
}
// Skips all short term reference picture sets.
skipShortTermRefPicSets(bitArray);
if (bitArray.readBit()) { // long_term_ref_pics_present_flag
// num_long_term_ref_pics_sps
for (int i = 0; i < bitArray.readUnsignedExpGolombCodedInt(); i++) {
int ltRefPicPocLsbSpsLength = log2MaxPicOrderCntLsbMinus4 + 4;
// lt_ref_pic_poc_lsb_sps[i], used_by_curr_pic_lt_sps_flag[i]
bitArray.skipBits(ltRefPicPocLsbSpsLength + 1);
}
}
bitArray.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag
float pixelWidthHeightRatio = 1;
if (bitArray.readBit()) { // vui_parameters_present_flag
if (bitArray.readBit()) { // aspect_ratio_info_present_flag
int aspectRatioIdc = bitArray.readBits(8);
if (aspectRatioIdc == NalUnitUtil.EXTENDED_SAR) {
int sarWidth = bitArray.readBits(16);
int sarHeight = bitArray.readBits(16);
if (sarWidth != 0 && sarHeight != 0) {
pixelWidthHeightRatio = (float) sarWidth / sarHeight;
}
} else if (aspectRatioIdc < NalUnitUtil.ASPECT_RATIO_IDC_VALUES.length) {
pixelWidthHeightRatio = NalUnitUtil.ASPECT_RATIO_IDC_VALUES[aspectRatioIdc];
} else {
Log.w(TAG, "Unexpected aspect_ratio_idc value: " + aspectRatioIdc);
}
}
if (bitArray.readBit()) { // overscan_info_present_flag
bitArray.skipBit(); // overscan_appropriate_flag
}
if (bitArray.readBit()) { // video_signal_type_present_flag
bitArray.skipBits(4); // video_format, video_full_range_flag
if (bitArray.readBit()) { // colour_description_present_flag
// colour_primaries, transfer_characteristics, matrix_coeffs
bitArray.skipBits(24);
}
}
if (bitArray.readBit()) { // chroma_loc_info_present_flag
bitArray.readUnsignedExpGolombCodedInt(); // chroma_sample_loc_type_top_field
bitArray.readUnsignedExpGolombCodedInt(); // chroma_sample_loc_type_bottom_field
}
bitArray.skipBit(); // neutral_chroma_indication_flag
if (bitArray.readBit()) { // field_seq_flag
// field_seq_flag equal to 1 indicates that the coded video sequence conveys pictures that
// represent fields, which means that frame height is double the picture height.
picHeightInLumaSamples *= 2;
}
}
String codecs = String codecs =
CodecSpecificDataUtil.buildHevcCodecString( CodecSpecificDataUtil.buildHevcCodecString(
generalProfileSpace, spsData.generalProfileSpace,
generalTierFlag, spsData.generalTierFlag,
generalProfileIdc, spsData.generalProfileIdc,
generalProfileCompatibilityFlags, spsData.generalProfileCompatibilityFlags,
constraintBytes, spsData.constraintBytes,
generalLevelIdc); spsData.generalLevelIdc);
return new Format.Builder() return new Format.Builder()
.setId(formatId) .setId(formatId)
.setSampleMimeType(MimeTypes.VIDEO_H265) .setSampleMimeType(MimeTypes.VIDEO_H265)
.setCodecs(codecs) .setCodecs(codecs)
.setWidth(picWidthInLumaSamples) .setWidth(spsData.width)
.setHeight(picHeightInLumaSamples) .setHeight(spsData.height)
.setPixelWidthHeightRatio(pixelWidthHeightRatio) .setPixelWidthHeightRatio(spsData.pixelWidthHeightRatio)
.setInitializationData(Collections.singletonList(csdData)) .setInitializationData(Collections.singletonList(csdData))
.build(); .build();
} }
/** Skips scaling_list_data(). See H.265/HEVC (2014) 7.3.4. */
private static void skipScalingList(ParsableNalUnitBitArray bitArray) {
for (int sizeId = 0; sizeId < 4; sizeId++) {
for (int matrixId = 0; matrixId < 6; matrixId += sizeId == 3 ? 3 : 1) {
if (!bitArray.readBit()) { // scaling_list_pred_mode_flag[sizeId][matrixId]
// scaling_list_pred_matrix_id_delta[sizeId][matrixId]
bitArray.readUnsignedExpGolombCodedInt();
} else {
int coefNum = min(64, 1 << (4 + (sizeId << 1)));
if (sizeId > 1) {
// scaling_list_dc_coef_minus8[sizeId - 2][matrixId]
bitArray.readSignedExpGolombCodedInt();
}
for (int i = 0; i < coefNum; i++) {
bitArray.readSignedExpGolombCodedInt(); // scaling_list_delta_coef
}
}
}
}
}
/**
* Reads the number of short term reference picture sets in a SPS as ue(v), then skips all of
* them. See H.265/HEVC (2014) 7.3.7.
*/
private static void skipShortTermRefPicSets(ParsableNalUnitBitArray bitArray) {
int numShortTermRefPicSets = bitArray.readUnsignedExpGolombCodedInt();
boolean interRefPicSetPredictionFlag = false;
int numNegativePics;
int numPositivePics;
// As this method applies in a SPS, the only element of NumDeltaPocs accessed is the previous
// one, so we just keep track of that rather than storing the whole array.
// RefRpsIdx = stRpsIdx - (delta_idx_minus1 + 1) and delta_idx_minus1 is always zero in SPS.
int previousNumDeltaPocs = 0;
for (int stRpsIdx = 0; stRpsIdx < numShortTermRefPicSets; stRpsIdx++) {
if (stRpsIdx != 0) {
interRefPicSetPredictionFlag = bitArray.readBit();
}
if (interRefPicSetPredictionFlag) {
bitArray.skipBit(); // delta_rps_sign
bitArray.readUnsignedExpGolombCodedInt(); // abs_delta_rps_minus1
for (int j = 0; j <= previousNumDeltaPocs; j++) {
if (bitArray.readBit()) { // used_by_curr_pic_flag[j]
bitArray.skipBit(); // use_delta_flag[j]
}
}
} else {
numNegativePics = bitArray.readUnsignedExpGolombCodedInt();
numPositivePics = bitArray.readUnsignedExpGolombCodedInt();
previousNumDeltaPocs = numNegativePics + numPositivePics;
for (int i = 0; i < numNegativePics; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // delta_poc_s0_minus1[i]
bitArray.skipBit(); // used_by_curr_pic_s0_flag[i]
}
for (int i = 0; i < numPositivePics; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // delta_poc_s1_minus1[i]
bitArray.skipBit(); // used_by_curr_pic_s1_flag[i]
}
}
}
}
@EnsuresNonNull({"output", "sampleReader"}) @EnsuresNonNull({"output", "sampleReader"})
private void assertTracksCreated() { private void assertTracksCreated() {
Assertions.checkStateNotNull(output); Assertions.checkStateNotNull(output);

View file

@ -92,6 +92,12 @@ public final class TsExtractorTest {
ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h265.ts", simulationConfig); ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h265.ts", simulationConfig);
} }
@Test
public void sampleWithH265RpsPred() throws Exception {
ExtractorAsserts.assertBehavior(
TsExtractor::new, "media/ts/sample_h265_rps_pred.ts", simulationConfig);
}
@Test @Test
public void sampleWithScte35() throws Exception { public void sampleWithScte35() throws Exception {
ExtractorAsserts.assertBehavior( ExtractorAsserts.assertBehavior(

View file

@ -0,0 +1,81 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(500000) = [[timeUs=500000, position=7134]]
getPosition(1000000) = [[timeUs=1000000, position=14457]]
numberOfTracks = 1
track 256:
total output bytes = 10004
sample count = 15
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 266666
flags = 1
data = length 7464, hash EBF8518B
sample 1:
time = 1200000
flags = 0
data = length 1042, hash F69C93E1
sample 2:
time = 733333
flags = 0
data = length 465, hash 2B469969
sample 3:
time = 466666
flags = 0
data = length 177, hash 79777966
sample 4:
time = 333333
flags = 0
data = length 65, hash 63DA4886
sample 5:
time = 400000
flags = 0
data = length 33, hash EFE759C6
sample 6:
time = 600000
flags = 0
data = length 88, hash 98333D02
sample 7:
time = 533333
flags = 0
data = length 49, hash F9A023E1
sample 8:
time = 666666
flags = 0
data = length 58, hash 74F1E9D9
sample 9:
time = 933333
flags = 0
data = length 114, hash FA033C4D
sample 10:
time = 800000
flags = 0
data = length 87, hash 1A1C57E4
sample 11:
time = 866666
flags = 0
data = length 65, hash 59F937BE
sample 12:
time = 1066666
flags = 0
data = length 94, hash 5D02AC81
sample 13:
time = 1000000
flags = 0
data = length 57, hash 2750D207
sample 14:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true

View file

@ -0,0 +1,65 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(500000) = [[timeUs=500000, position=7134]]
getPosition(1000000) = [[timeUs=1000000, position=14457]]
numberOfTracks = 1
track 256:
total output bytes = 856
sample count = 11
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 333333
flags = 0
data = length 65, hash 63DA4886
sample 1:
time = 400000
flags = 0
data = length 33, hash EFE759C6
sample 2:
time = 600000
flags = 0
data = length 88, hash 98333D02
sample 3:
time = 533333
flags = 0
data = length 49, hash F9A023E1
sample 4:
time = 666666
flags = 0
data = length 58, hash 74F1E9D9
sample 5:
time = 933333
flags = 0
data = length 114, hash FA033C4D
sample 6:
time = 800000
flags = 0
data = length 87, hash 1A1C57E4
sample 7:
time = 866666
flags = 0
data = length 65, hash 59F937BE
sample 8:
time = 1066666
flags = 0
data = length 94, hash 5D02AC81
sample 9:
time = 1000000
flags = 0
data = length 57, hash 2750D207
sample 10:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true

View file

@ -0,0 +1,45 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(500000) = [[timeUs=500000, position=7134]]
getPosition(1000000) = [[timeUs=1000000, position=14457]]
numberOfTracks = 1
track 256:
total output bytes = 563
sample count = 6
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 933333
flags = 0
data = length 114, hash FA033C4D
sample 1:
time = 800000
flags = 0
data = length 87, hash 1A1C57E4
sample 2:
time = 866666
flags = 0
data = length 65, hash 59F937BE
sample 3:
time = 1066666
flags = 0
data = length 94, hash 5D02AC81
sample 4:
time = 1000000
flags = 0
data = length 57, hash 2750D207
sample 5:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true

View file

@ -0,0 +1,25 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(500000) = [[timeUs=500000, position=7134]]
getPosition(1000000) = [[timeUs=1000000, position=14457]]
numberOfTracks = 1
track 256:
total output bytes = 146
sample count = 1
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true

View file

@ -0,0 +1,78 @@
seekMap:
isSeekable = false
duration = UNSET TIME
getPosition(0) = [[timeUs=0, position=0]]
numberOfTracks = 1
track 256:
total output bytes = 10004
sample count = 15
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 266666
flags = 1
data = length 7464, hash EBF8518B
sample 1:
time = 1200000
flags = 0
data = length 1042, hash F69C93E1
sample 2:
time = 733333
flags = 0
data = length 465, hash 2B469969
sample 3:
time = 466666
flags = 0
data = length 177, hash 79777966
sample 4:
time = 333333
flags = 0
data = length 65, hash 63DA4886
sample 5:
time = 400000
flags = 0
data = length 33, hash EFE759C6
sample 6:
time = 600000
flags = 0
data = length 88, hash 98333D02
sample 7:
time = 533333
flags = 0
data = length 49, hash F9A023E1
sample 8:
time = 666666
flags = 0
data = length 58, hash 74F1E9D9
sample 9:
time = 933333
flags = 0
data = length 114, hash FA033C4D
sample 10:
time = 800000
flags = 0
data = length 87, hash 1A1C57E4
sample 11:
time = 866666
flags = 0
data = length 65, hash 59F937BE
sample 12:
time = 1066666
flags = 0
data = length 94, hash 5D02AC81
sample 13:
time = 1000000
flags = 0
data = length 57, hash 2750D207
sample 14:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true