mirror of
https://github.com/samsonjs/media.git
synced 2026-03-25 09:25:53 +00:00
Handle out-of-order frames in endIndices for MP4 with edit list
Updated logic to walk forward in the timestamps array to include all frames within the valid edit duration, accounting for out-of-order frames. This ensures that no frames with timestamps less than `editMediaTime` + `editDuration` are incorrectly excluded. Issue: androidx/media#1797 PiperOrigin-RevId: 686075680
This commit is contained in:
parent
9adb3aaf41
commit
91c56335ef
13 changed files with 152 additions and 39 deletions
|
|
@ -78,6 +78,8 @@
|
|||
timescale, `media_time` is now properly scaled using the track
|
||||
timescale, as specified by the MP4 format standard
|
||||
([#1792](https://github.com/androidx/media/issues/1792)).
|
||||
* Handle out-of-order frames in `endIndices` calculation for MP4 with edit
|
||||
list ([#1797](https://github.com/androidx/media/issues/1797)).
|
||||
* DataSource:
|
||||
* Audio:
|
||||
* Fix pop sounds that may occur during seeks.
|
||||
|
|
|
|||
|
|
@ -717,22 +717,39 @@ public final class BoxParser {
|
|||
Util.scaleLargeTimestamp(
|
||||
track.editListDurations[i], track.timescale, track.movieTimescale);
|
||||
// The timestamps array is in the order read from the media, which might not be strictly
|
||||
// sorted, but will ensure that a) all sync frames are in-order and b) any out-of-order
|
||||
// frames are after their respective sync frames. This means that although the result of
|
||||
// this binary search might be slightly incorrect (due to out-of-order timestamps), the loop
|
||||
// below that walks backward to find the previous sync frame will result in a correct start
|
||||
// index.
|
||||
// sorted. However, all sync frames are guaranteed to be in order, and any out-of-order
|
||||
// frames appear after their respective sync frames. This ensures that although the result
|
||||
// of the binary search might not be entirely accurate (due to the out-of-order timestamps),
|
||||
// the following logic ensures correctness for both start and end indices.
|
||||
//
|
||||
// The startIndices calculation finds the largest timestamp that is less than or equal to
|
||||
// editMediaTime. It then walks backward to ensure the index points to a sync frame, since
|
||||
// decoding must start from a keyframe.
|
||||
startIndices[i] =
|
||||
Util.binarySearchFloor(
|
||||
timestamps, editMediaTime, /* inclusive= */ true, /* stayInBounds= */ true);
|
||||
while (startIndices[i] >= 0 && (flags[startIndices[i]] & C.BUFFER_FLAG_KEY_FRAME) == 0) {
|
||||
startIndices[i]--;
|
||||
}
|
||||
// The endIndices calculation finds the smallest timestamp that is greater than
|
||||
// editMediaTime + editDuration, except when omitZeroDurationClippedSample is true, in which
|
||||
// case it finds the smallest timestamp that is greater than or equal to editMediaTime +
|
||||
// editDuration.
|
||||
endIndices[i] =
|
||||
Util.binarySearchCeil(
|
||||
timestamps,
|
||||
editMediaTime + editDuration,
|
||||
/* inclusive= */ omitZeroDurationClippedSample,
|
||||
/* stayInBounds= */ false);
|
||||
while (startIndices[i] >= 0 && (flags[startIndices[i]] & C.BUFFER_FLAG_KEY_FRAME) == 0) {
|
||||
startIndices[i]--;
|
||||
if (track.type == C.TRACK_TYPE_VIDEO) {
|
||||
// To account for out-of-order video frames that may have timestamps smaller than or equal
|
||||
// to editMediaTime + editDuration, but still fall within the valid range, the loop walks
|
||||
// forward through the timestamps array to ensure all frames with timestamps within the
|
||||
// edit duration are included.
|
||||
while (endIndices[i] < timestamps.length - 1
|
||||
&& timestamps[endIndices[i] + 1] <= (editMediaTime + editDuration)) {
|
||||
endIndices[i]++;
|
||||
}
|
||||
}
|
||||
editedSampleCount += endIndices[i] - startIndices[i];
|
||||
copyMetadata |= nextSampleIndex != startIndices[i];
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 3112471
|
||||
sample count = 83
|
||||
total output bytes = 3208515
|
||||
sample count = 85
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -358,8 +358,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 82:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 83:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 84:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 45765
|
||||
sample count = 112
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 2168517
|
||||
sample count = 60
|
||||
total output bytes = 2264561
|
||||
sample count = 62
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -266,8 +266,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 59:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 60:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 61:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 30664
|
||||
sample count = 76
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 1019852
|
||||
sample count = 28
|
||||
total output bytes = 1115896
|
||||
sample count = 30
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -138,8 +138,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 27:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 28:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 29:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 15570
|
||||
sample count = 39
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 1019852
|
||||
sample count = 28
|
||||
total output bytes = 1115896
|
||||
sample count = 30
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -138,8 +138,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 27:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 28:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 29:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 1239
|
||||
sample count = 3
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 3112471
|
||||
sample count = 83
|
||||
total output bytes = 3208515
|
||||
sample count = 85
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -358,8 +358,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 82:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 83:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 84:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 45765
|
||||
sample count = 112
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 2168517
|
||||
sample count = 60
|
||||
total output bytes = 2264561
|
||||
sample count = 62
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -266,8 +266,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 59:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 60:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 61:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 30664
|
||||
sample count = 76
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 1019852
|
||||
sample count = 28
|
||||
total output bytes = 1115896
|
||||
sample count = 30
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -138,8 +138,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 27:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 28:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 29:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 15570
|
||||
sample count = 39
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 1019852
|
||||
sample count = 28
|
||||
total output bytes = 1115896
|
||||
sample count = 30
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -138,8 +138,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 27:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 28:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 29:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 1239
|
||||
sample count = 3
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 3112471
|
||||
sample count = 83
|
||||
total output bytes = 3208515
|
||||
sample count = 85
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -358,8 +358,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 82:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 83:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 84:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 45765
|
||||
sample count = 112
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ seekMap:
|
|||
getPosition(2548333) = [[timeUs=1680000, position=34939]]
|
||||
numberOfTracks = 2
|
||||
track 0:
|
||||
total output bytes = 3112471
|
||||
sample count = 83
|
||||
total output bytes = 3208515
|
||||
sample count = 85
|
||||
format 0:
|
||||
id = 1
|
||||
sampleMimeType = video/dolby-vision
|
||||
|
|
@ -358,8 +358,16 @@ track 0:
|
|||
data = length 23136, hash 8AF1C1AD
|
||||
sample 82:
|
||||
time = 2446666
|
||||
flags = 536870912
|
||||
flags = 0
|
||||
data = length 26792, hash 3157758F
|
||||
sample 83:
|
||||
time = 2613333
|
||||
flags = 0
|
||||
data = length 62711, hash EF9AC8F5
|
||||
sample 84:
|
||||
time = 2546666
|
||||
flags = 536870912
|
||||
data = length 33333, hash 567D33D6
|
||||
track 1:
|
||||
total output bytes = 45765
|
||||
sample count = 112
|
||||
|
|
|
|||
|
|
@ -793,7 +793,7 @@ MediaCodecAdapter (exotest.audio.aac):
|
|||
rendered = false
|
||||
MediaCodecAdapter (exotest.video.hevc):
|
||||
inputBuffers:
|
||||
count = 84
|
||||
count = 86
|
||||
input buffer #0:
|
||||
timeUs = 999999545000
|
||||
contents = length 78829, hash 9265686F
|
||||
|
|
@ -1044,11 +1044,17 @@ MediaCodecAdapter (exotest.video.hevc):
|
|||
timeUs = 1000002446666
|
||||
contents = length 26792, hash 3157758F
|
||||
input buffer #83:
|
||||
timeUs = 1000002613333
|
||||
contents = length 62711, hash EF9AC8F5
|
||||
input buffer #84:
|
||||
timeUs = 1000002546666
|
||||
contents = length 33333, hash 567D33D6
|
||||
input buffer #85:
|
||||
timeUs = 0
|
||||
flags = 4
|
||||
contents = length 0, hash 1
|
||||
outputBuffers:
|
||||
count = 83
|
||||
count = 85
|
||||
output buffer #0:
|
||||
timeUs = 999999545000
|
||||
size = 78829
|
||||
|
|
@ -1381,6 +1387,14 @@ MediaCodecAdapter (exotest.video.hevc):
|
|||
timeUs = 1000002446666
|
||||
size = 26792
|
||||
rendered = true
|
||||
output buffer #83:
|
||||
timeUs = 1000002613333
|
||||
size = 62711
|
||||
rendered = true
|
||||
output buffer #84:
|
||||
timeUs = 1000002546666
|
||||
size = 33333
|
||||
rendered = true
|
||||
AudioSink:
|
||||
buffer count = 112
|
||||
config:
|
||||
|
|
|
|||
Loading…
Reference in a new issue