mirror of
https://github.com/samsonjs/media.git
synced 2026-03-25 09:25:53 +00:00
To avoid rounding errors, set the `Rounding mode` of the `uvFromVu` and `vuFromUs` results to `HALF_UP`. This `Rounding mode` rounds numbers towards the "nearest neighbor" unless both neighbors are equidistant, in which case round up. PiperOrigin-RevId: 679003943
1833 lines
67 KiB
Java
1833 lines
67 KiB
Java
/*
|
||
* Copyright 2022 The Android Open Source Project
|
||
*
|
||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
* you may not use this file except in compliance with the License.
|
||
* You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*/
|
||
package androidx.media3.muxer;
|
||
|
||
import static androidx.media3.common.util.Assertions.checkArgument;
|
||
import static androidx.media3.common.util.Assertions.checkNotNull;
|
||
import static androidx.media3.common.util.Assertions.checkState;
|
||
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX;
|
||
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER;
|
||
import static androidx.media3.muxer.MuxerUtil.UNSIGNED_INT_MAX_VALUE;
|
||
import static java.lang.Math.max;
|
||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||
|
||
import android.media.MediaCodec;
|
||
import android.media.MediaCodec.BufferInfo;
|
||
import android.media.MediaCodecInfo;
|
||
import android.util.Pair;
|
||
import androidx.annotation.Nullable;
|
||
import androidx.media3.common.C;
|
||
import androidx.media3.common.ColorInfo;
|
||
import androidx.media3.common.Format;
|
||
import androidx.media3.common.MimeTypes;
|
||
import androidx.media3.common.util.CodecSpecificDataUtil;
|
||
import androidx.media3.common.util.Util;
|
||
import androidx.media3.container.MdtaMetadataEntry;
|
||
import androidx.media3.container.Mp4LocationData;
|
||
import androidx.media3.container.NalUnitUtil;
|
||
import androidx.media3.muxer.FragmentedMp4Writer.SampleMetadata;
|
||
import com.google.common.collect.ImmutableList;
|
||
import com.google.common.collect.Iterables;
|
||
import com.google.common.collect.Lists;
|
||
import com.google.common.primitives.Bytes;
|
||
import com.google.common.primitives.Ints;
|
||
import java.math.RoundingMode;
|
||
import java.nio.ByteBuffer;
|
||
import java.util.ArrayDeque;
|
||
import java.util.ArrayList;
|
||
import java.util.Arrays;
|
||
import java.util.Collections;
|
||
import java.util.List;
|
||
import java.util.Locale;
|
||
import org.checkerframework.checker.nullness.qual.PolyNull;
|
||
|
||
/** Writes out various types of boxes as per MP4 (ISO/IEC 14496-12) standards. */
|
||
/* package */ final class Boxes {
|
||
/** Total number of bytes in an integer. */
|
||
private static final int BYTES_PER_INTEGER = 4;
|
||
|
||
/** Box size (4 bytes) + Box name (4 bytes) */
|
||
public static final int BOX_HEADER_SIZE = 8;
|
||
|
||
/**
|
||
* Box size = 1 to indicate 64-bit box size (4 bytes) + Box name (4 bytes) + actual box size (8
|
||
* bytes)
|
||
*/
|
||
public static final int LARGE_SIZE_BOX_HEADER_SIZE = 16;
|
||
|
||
/** The size (in bytes) of the mfhd box content. */
|
||
public static final int MFHD_BOX_CONTENT_SIZE = 2 * BYTES_PER_INTEGER;
|
||
|
||
/** The size (in bytes) of the tfhd box content. */
|
||
public static final int TFHD_BOX_CONTENT_SIZE = 4 * BYTES_PER_INTEGER;
|
||
|
||
/** The maximum size (in bytes) of boxes that have fixed sizes. */
|
||
private static final int MAX_FIXED_LEAF_BOX_SIZE = 200;
|
||
|
||
/**
|
||
* The per-video timebase, used for durations in MVHD and TKHD even if the per-track timebase is
|
||
* different (e.g. typically the sample rate for audio).
|
||
*/
|
||
private static final long MVHD_TIMEBASE = 10_000L;
|
||
|
||
/** unsigned int(2) sample_depends_on = 2 (bit index 25 and 24) */
|
||
private static final int TRUN_BOX_SYNC_SAMPLE_FLAGS = 0b00000010_00000000_00000000_00000000;
|
||
|
||
/**
|
||
* unsigned int(2) sample_depends_on = 1 (bit index 25 and 24), bit(1) sample_is_non_sync_sample =
|
||
* 1 (bit index 16)
|
||
*/
|
||
private static final int TRUN_BOX_NON_SYNC_SAMPLE_FLAGS = 0b00000001_00000001_00000000_00000000;
|
||
|
||
private Boxes() {}
|
||
|
||
public static final ImmutableList<Byte> XMP_UUID =
|
||
ImmutableList.of(
|
||
(byte) 0xBE,
|
||
(byte) 0x7A,
|
||
(byte) 0xCF,
|
||
(byte) 0xCB,
|
||
(byte) 0x97,
|
||
(byte) 0xA9,
|
||
(byte) 0x42,
|
||
(byte) 0xE8,
|
||
(byte) 0x9C,
|
||
(byte) 0x71,
|
||
(byte) 0x99,
|
||
(byte) 0x94,
|
||
(byte) 0x91,
|
||
(byte) 0xE3,
|
||
(byte) 0xAF,
|
||
(byte) 0xAC);
|
||
|
||
/** Returns the moov box. */
|
||
@SuppressWarnings("InlinedApi")
|
||
public static ByteBuffer moov(
|
||
List<Track> tracks,
|
||
MetadataCollector metadataCollector,
|
||
long minInputPtsUs,
|
||
boolean isFragmentedMp4,
|
||
@Mp4Muxer.LastSampleDurationBehavior int lastSampleDurationBehavior) {
|
||
// The timestamp will always fit into a 32-bit integer. This is already validated in the
|
||
// Mp4Muxer.setTimestampData() API. The value after type casting might be negative, but it is
|
||
// still valid because it is meant to be read as an unsigned integer.
|
||
int creationTimestampSeconds = (int) metadataCollector.timestampData.creationTimestampSeconds;
|
||
int modificationTimestampSeconds =
|
||
(int) metadataCollector.timestampData.modificationTimestampSeconds;
|
||
List<ByteBuffer> trakBoxes = new ArrayList<>();
|
||
List<ByteBuffer> trexBoxes = new ArrayList<>();
|
||
|
||
int nextTrackId = 1;
|
||
long videoDurationUs = 0L;
|
||
for (int i = 0; i < tracks.size(); i++) {
|
||
Track track = tracks.get(i);
|
||
if (!isFragmentedMp4 && track.writtenSamples.isEmpty()) {
|
||
continue;
|
||
}
|
||
Format format = track.format;
|
||
String languageCode = bcp47LanguageTagToIso3(format.language);
|
||
|
||
// Generate the sample durations to calculate the total duration for tkhd box.
|
||
List<Integer> sampleDurationsVu =
|
||
convertPresentationTimestampsToDurationsVu(
|
||
track.writtenSamples,
|
||
minInputPtsUs,
|
||
track.videoUnitTimebase(),
|
||
lastSampleDurationBehavior,
|
||
track.endOfStreamTimestampUs);
|
||
|
||
long trackDurationInTrackUnitsVu = 0;
|
||
for (int j = 0; j < sampleDurationsVu.size(); j++) {
|
||
trackDurationInTrackUnitsVu += sampleDurationsVu.get(j);
|
||
}
|
||
|
||
long trackDurationUs = usFromVu(trackDurationInTrackUnitsVu, track.videoUnitTimebase());
|
||
|
||
@C.TrackType int trackType = MimeTypes.getTrackType(format.sampleMimeType);
|
||
ByteBuffer stts = stts(sampleDurationsVu);
|
||
ByteBuffer ctts =
|
||
MimeTypes.isVideo(format.sampleMimeType)
|
||
? ctts(track.writtenSamples, sampleDurationsVu, track.videoUnitTimebase())
|
||
: ByteBuffer.allocate(0);
|
||
ByteBuffer stsz = stsz(track.writtenSamples);
|
||
ByteBuffer stsc = stsc(track.writtenChunkSampleCounts);
|
||
ByteBuffer chunkOffsetBox =
|
||
isFragmentedMp4 ? stco(track.writtenChunkOffsets) : co64(track.writtenChunkOffsets);
|
||
|
||
String handlerType;
|
||
String handlerName;
|
||
ByteBuffer mhdBox;
|
||
ByteBuffer sampleEntryBox;
|
||
ByteBuffer stsdBox;
|
||
ByteBuffer stblBox;
|
||
|
||
switch (trackType) {
|
||
case C.TRACK_TYPE_VIDEO:
|
||
handlerType = "vide";
|
||
handlerName = "VideoHandle";
|
||
mhdBox = vmhd();
|
||
sampleEntryBox = videoSampleEntry(format);
|
||
stsdBox = stsd(sampleEntryBox);
|
||
stblBox =
|
||
stbl(stsdBox, stts, ctts, stsz, stsc, chunkOffsetBox, stss(track.writtenSamples));
|
||
break;
|
||
case C.TRACK_TYPE_AUDIO:
|
||
handlerType = "soun";
|
||
handlerName = "SoundHandle";
|
||
mhdBox = smhd();
|
||
sampleEntryBox = audioSampleEntry(format);
|
||
stsdBox = stsd(sampleEntryBox);
|
||
stblBox = stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox);
|
||
break;
|
||
case C.TRACK_TYPE_METADATA:
|
||
case C.TRACK_TYPE_UNKNOWN:
|
||
handlerType = "meta";
|
||
handlerName = "MetaHandle";
|
||
mhdBox = nmhd();
|
||
sampleEntryBox = textMetaDataSampleEntry(format);
|
||
stsdBox = stsd(sampleEntryBox);
|
||
stblBox = stbl(stsdBox, stts, stsz, stsc, chunkOffsetBox);
|
||
break;
|
||
default:
|
||
throw new IllegalArgumentException("Unsupported track type");
|
||
}
|
||
|
||
ByteBuffer trakBox =
|
||
trak(
|
||
tkhd(
|
||
nextTrackId,
|
||
trackDurationUs,
|
||
creationTimestampSeconds,
|
||
modificationTimestampSeconds,
|
||
metadataCollector.orientationData.orientation,
|
||
format),
|
||
mdia(
|
||
mdhd(
|
||
trackDurationInTrackUnitsVu,
|
||
track.videoUnitTimebase(),
|
||
creationTimestampSeconds,
|
||
modificationTimestampSeconds,
|
||
languageCode),
|
||
hdlr(handlerType, handlerName),
|
||
minf(mhdBox, dinf(dref(localUrl())), stblBox)));
|
||
|
||
trakBoxes.add(trakBox);
|
||
videoDurationUs = max(videoDurationUs, trackDurationUs);
|
||
trexBoxes.add(trex(nextTrackId));
|
||
nextTrackId++;
|
||
}
|
||
|
||
ByteBuffer mvhdBox =
|
||
mvhd(nextTrackId, creationTimestampSeconds, modificationTimestampSeconds, videoDurationUs);
|
||
ByteBuffer udtaBox = udta(metadataCollector.locationData);
|
||
ByteBuffer metaBox =
|
||
metadataCollector.metadataEntries.isEmpty()
|
||
? ByteBuffer.allocate(0)
|
||
: meta(
|
||
hdlr(/* handlerType= */ "mdta", /* handlerName= */ ""),
|
||
keys(Lists.newArrayList(metadataCollector.metadataEntries)),
|
||
ilst(Lists.newArrayList(metadataCollector.metadataEntries)));
|
||
|
||
List<ByteBuffer> subBoxes = new ArrayList<>();
|
||
subBoxes.add(mvhdBox);
|
||
subBoxes.add(udtaBox);
|
||
subBoxes.add(metaBox);
|
||
subBoxes.addAll(trakBoxes);
|
||
if (isFragmentedMp4) {
|
||
subBoxes.add(mvex(trexBoxes));
|
||
}
|
||
|
||
ByteBuffer moovBox = BoxUtils.wrapBoxesIntoBox("moov", subBoxes);
|
||
|
||
if (metadataCollector.xmpData != null) {
|
||
return BoxUtils.concatenateBuffers(
|
||
moovBox, uuid(XMP_UUID, ByteBuffer.wrap(metadataCollector.xmpData.data)));
|
||
} else {
|
||
// No need for another copy if there is no XMP to be appended.
|
||
return moovBox;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns the tkhd box.
|
||
*
|
||
* <p>This is a per-track header box.
|
||
*/
|
||
public static ByteBuffer tkhd(
|
||
int trackId,
|
||
long trackDurationUs,
|
||
int creationTimestampSeconds,
|
||
int modificationTimestampSeconds,
|
||
int orientation,
|
||
Format format) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x00000007); // version and flags: allow presentation, etc.
|
||
|
||
contents.putInt(creationTimestampSeconds); // creation_time: unsigned int(32)
|
||
contents.putInt(modificationTimestampSeconds); // modification_time: unsigned int(32)
|
||
|
||
contents.putInt(trackId);
|
||
contents.putInt(0); // reserved
|
||
|
||
// Using the time base of the entire file, not that of the track; otherwise,
|
||
// Quicktime will stretch the audio accordingly, see b/158120042.
|
||
int trackDurationVu = (int) vuFromUs(trackDurationUs, MVHD_TIMEBASE);
|
||
contents.putInt(trackDurationVu);
|
||
|
||
contents.putInt(0); // reserved
|
||
contents.putInt(0); // reserved
|
||
|
||
contents.putInt(0); // layer = 0 and alternate_group = 0
|
||
contents.putShort(MimeTypes.isAudio(format.sampleMimeType) ? (short) 0x0100 : 0); // volume
|
||
contents.putShort((short) 0); // reserved
|
||
|
||
contents.put(rotationMatrixFromOrientation(orientation));
|
||
|
||
int width = format.width != Format.NO_VALUE ? format.width : 0;
|
||
int height = format.height != Format.NO_VALUE ? format.height : 0;
|
||
|
||
contents.putInt(width << 16);
|
||
contents.putInt(height << 16);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("tkhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the mvhd box.
|
||
*
|
||
* <p>This is the movie header for the entire MP4 file.
|
||
*/
|
||
public static ByteBuffer mvhd(
|
||
int nextEmptyTrackId,
|
||
int creationTimestampSeconds,
|
||
int modificationTimestampSeconds,
|
||
long videoDurationUs) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0); // version and flags
|
||
|
||
contents.putInt(creationTimestampSeconds); // creation_time: unsigned int(32)
|
||
contents.putInt(modificationTimestampSeconds); // modification_time: unsigned int(32)
|
||
contents.putInt((int) MVHD_TIMEBASE); // The per-track timescales might be different.
|
||
contents.putInt(
|
||
(int) vuFromUs(videoDurationUs, MVHD_TIMEBASE)); // Duration of the entire video.
|
||
contents.putInt(0x00010000); // rate = 1.0
|
||
contents.putShort((short) 0x0100); // volume = full volume
|
||
contents.putShort((short) 0); // reserved
|
||
|
||
contents.putInt(0); // reserved
|
||
contents.putInt(0); // reserved
|
||
|
||
// Default values (unity matrix). It looks like that this needs to be an identity matrix, since
|
||
// some players will apply both this and the per-track transformation, while some only go with
|
||
// the per-track one.
|
||
int[] matrix = {0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000};
|
||
for (int i = 0; i < matrix.length; i++) {
|
||
contents.putInt(matrix[i]);
|
||
}
|
||
|
||
for (int i = 0; i < 6; i++) {
|
||
contents.putInt(0); // pre_defined
|
||
}
|
||
|
||
// Next empty track id.
|
||
contents.putInt(nextEmptyTrackId);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("mvhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the mdhd box.
|
||
*
|
||
* <p>This is a per-track (media) header.
|
||
*/
|
||
public static ByteBuffer mdhd(
|
||
long trackDurationVu,
|
||
int videoUnitTimebase,
|
||
int creationTimestampSeconds,
|
||
int modificationTimestampSeconds,
|
||
@Nullable String languageCode) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
contents.putInt(creationTimestampSeconds); // creation_time: unsigned int(32)
|
||
contents.putInt(modificationTimestampSeconds); // modification_time: unsigned int(32)
|
||
|
||
contents.putInt(videoUnitTimebase);
|
||
|
||
contents.putInt((int) trackDurationVu);
|
||
|
||
contents.putShort(languageCodeFromString(languageCode));
|
||
contents.putShort((short) 0);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("mdhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the vmhd box.
|
||
*
|
||
* <p>This is a header for video tracks.
|
||
*/
|
||
public static ByteBuffer vmhd() {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
contents.putShort((short) 0); // graphicsmode
|
||
// opcolor (red, green, blue)
|
||
contents.putShort((short) 0);
|
||
contents.putShort((short) 0);
|
||
contents.putShort((short) 0);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("vmhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the smhd box.
|
||
*
|
||
* <p>This is a header for audio tracks.
|
||
*/
|
||
public static ByteBuffer smhd() {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
contents.putShort((short) 0); // balance
|
||
contents.putShort((short) 0); // reserved
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("smhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the nmhd box.
|
||
*
|
||
* <p>This is a header for metadata tracks.
|
||
*/
|
||
public static ByteBuffer nmhd() {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("nmhd", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns a text metadata sample entry box as per ISO/IEC 14496-12: 8.5.2.2.
|
||
*
|
||
* <p>This contains the sample entry (to be placed within the sample description box) for the text
|
||
* metadata tracks.
|
||
*/
|
||
public static ByteBuffer textMetaDataSampleEntry(Format format) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
String mimeType = checkNotNull(format.sampleMimeType);
|
||
byte[] mimeBytes = Util.getUtf8Bytes(mimeType);
|
||
contents.put(mimeBytes); // content_encoding
|
||
contents.put((byte) 0x0);
|
||
contents.put(mimeBytes); // mime_format
|
||
contents.put((byte) 0x0);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("mett", contents);
|
||
}
|
||
|
||
/** Returns the minf (media info) box. */
|
||
public static ByteBuffer minf(ByteBuffer... subBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("minf", Arrays.asList(subBoxes));
|
||
}
|
||
|
||
/** Returns the dref (data references) box. */
|
||
public static ByteBuffer dref(ByteBuffer... dataLocationBoxes) {
|
||
ByteBuffer header = ByteBuffer.allocate(8);
|
||
header.putInt(0);
|
||
header.putInt(dataLocationBoxes.length);
|
||
header.flip();
|
||
|
||
List<ByteBuffer> contents = new ArrayList<>();
|
||
contents.add(header);
|
||
Collections.addAll(contents, dataLocationBoxes);
|
||
|
||
return BoxUtils.wrapBoxesIntoBox("dref", contents);
|
||
}
|
||
|
||
/** Returns the dinf (data information) box. */
|
||
public static ByteBuffer dinf(ByteBuffer dref) {
|
||
return BoxUtils.wrapIntoBox("dinf", dref);
|
||
}
|
||
|
||
/**
|
||
* Returns the url box.
|
||
*
|
||
* <p>This box declares the location of media data (whether it is in this file or in some other
|
||
* remote file).
|
||
*/
|
||
public static ByteBuffer localUrl() {
|
||
ByteBuffer contents = ByteBuffer.allocate(4);
|
||
|
||
// Indicates that the data is in this file instead of in a remote URL. Hence no URL is written.
|
||
contents.putInt(1);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("url ", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the hdlr box.
|
||
*
|
||
* <p>This box includes tha handler specification for a track (signals whether this is video,
|
||
* audio or metadata).
|
||
*
|
||
* @param handlerType The handle type, as defined in ISO/IEC 14496-12: 8.4.3.3.
|
||
* @param handlerName The handler name, a human-readable name to identify track type for debugging
|
||
* and inspection purposes.
|
||
* @return {@link ByteBuffer} containing the hdlr box.
|
||
*/
|
||
public static ByteBuffer hdlr(String handlerType, String handlerName) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(0); // pre_defined
|
||
contents.put(Util.getUtf8Bytes(handlerType)); // handler_type
|
||
contents.putInt(0); // reserved
|
||
contents.putInt(0); // reserved
|
||
contents.putInt(0); // reserved
|
||
contents.put(Util.getUtf8Bytes(handlerName)); // name
|
||
contents.put((byte) 0); // The null terminator for name
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("hdlr", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the mdia box.
|
||
*
|
||
* <p>This box describes the media format of a track.
|
||
*/
|
||
public static ByteBuffer mdia(ByteBuffer... subBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("mdia", Arrays.asList(subBoxes));
|
||
}
|
||
|
||
/**
|
||
* Returns the trak box.
|
||
*
|
||
* <p>This is a top level track descriptor box; each track has one.
|
||
*/
|
||
public static ByteBuffer trak(ByteBuffer... subBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("trak", Arrays.asList(subBoxes));
|
||
}
|
||
|
||
/**
|
||
* Returns the udta box.
|
||
*
|
||
* <p>This box contains user data like location info.
|
||
*/
|
||
public static ByteBuffer udta(@Nullable Mp4LocationData location) {
|
||
if (location == null) {
|
||
return ByteBuffer.allocate(0);
|
||
}
|
||
|
||
String locationString =
|
||
Util.formatInvariant("%+.4f%+.4f/", location.latitude, location.longitude);
|
||
|
||
ByteBuffer xyzBoxContents = ByteBuffer.allocate(locationString.length() + 2 + 2);
|
||
xyzBoxContents.putShort((short) (xyzBoxContents.capacity() - 4));
|
||
xyzBoxContents.putShort((short) 0x15C7); // language code
|
||
|
||
xyzBoxContents.put(Util.getUtf8Bytes(locationString));
|
||
checkState(xyzBoxContents.limit() == xyzBoxContents.capacity());
|
||
xyzBoxContents.flip();
|
||
|
||
return BoxUtils.wrapIntoBox(
|
||
"udta",
|
||
BoxUtils.wrapIntoBox(
|
||
new byte[] {
|
||
(byte) 0xA9, // copyright symbol
|
||
'x',
|
||
'y',
|
||
'z'
|
||
},
|
||
xyzBoxContents));
|
||
}
|
||
|
||
/**
|
||
* Returns the keys box.
|
||
*
|
||
* <p>This box contains a list of metadata keys.
|
||
*/
|
||
public static ByteBuffer keys(List<MdtaMetadataEntry> mdtaMetadataEntries) {
|
||
int totalSizeToStoreKeys = 0;
|
||
for (int i = 0; i < mdtaMetadataEntries.size(); i++) {
|
||
// Add header size to wrap each key into a "mdta" box.
|
||
totalSizeToStoreKeys += mdtaMetadataEntries.get(i).key.length() + BOX_HEADER_SIZE;
|
||
}
|
||
ByteBuffer contents = ByteBuffer.allocate(2 * BYTES_PER_INTEGER + totalSizeToStoreKeys);
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(mdtaMetadataEntries.size()); // Entry count
|
||
|
||
for (int i = 0; i < mdtaMetadataEntries.size(); i++) {
|
||
ByteBuffer keyNameBuffer = ByteBuffer.wrap(Util.getUtf8Bytes(mdtaMetadataEntries.get(i).key));
|
||
contents.put(BoxUtils.wrapIntoBox("mdta", keyNameBuffer));
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("keys", contents);
|
||
}
|
||
|
||
/**
|
||
* Returns the ilst box.
|
||
*
|
||
* <p>This box contains a list of metadata values.
|
||
*/
|
||
public static ByteBuffer ilst(List<MdtaMetadataEntry> mdtaMetadataEntries) {
|
||
int totalSizeToStoreValues = 0;
|
||
for (int i = 0; i < mdtaMetadataEntries.size(); i++) {
|
||
// Add additional 16 bytes for writing metadata associated to each value.
|
||
// Add header size to wrap each value into a "data" box.
|
||
totalSizeToStoreValues +=
|
||
mdtaMetadataEntries.get(i).value.length + 4 * BYTES_PER_INTEGER + BOX_HEADER_SIZE;
|
||
}
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(totalSizeToStoreValues);
|
||
|
||
for (int i = 0; i < mdtaMetadataEntries.size(); i++) {
|
||
int keyId = i + 1;
|
||
MdtaMetadataEntry currentMdtaMetadataEntry = mdtaMetadataEntries.get(i);
|
||
|
||
ByteBuffer valueContents =
|
||
ByteBuffer.allocate(2 * BYTES_PER_INTEGER + currentMdtaMetadataEntry.value.length);
|
||
valueContents.putInt(currentMdtaMetadataEntry.typeIndicator);
|
||
valueContents.putInt(currentMdtaMetadataEntry.localeIndicator);
|
||
valueContents.put(currentMdtaMetadataEntry.value);
|
||
|
||
valueContents.flip();
|
||
ByteBuffer valueBox = BoxUtils.wrapIntoBox("data", valueContents);
|
||
contents.putInt(valueBox.remaining() + BOX_HEADER_SIZE);
|
||
contents.putInt(keyId);
|
||
contents.put(valueBox);
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("ilst", contents);
|
||
}
|
||
|
||
/** Returns the meta (metadata) box. */
|
||
public static ByteBuffer meta(ByteBuffer... subBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("meta", Arrays.asList(subBoxes));
|
||
}
|
||
|
||
/**
|
||
* Returns the uuid box.
|
||
*
|
||
* <p>This box is used for XMP and other metadata.
|
||
*/
|
||
public static ByteBuffer uuid(List<Byte> uuid, ByteBuffer contents) {
|
||
checkArgument(contents.remaining() > 0);
|
||
return BoxUtils.wrapBoxesIntoBox(
|
||
"uuid", ImmutableList.of(ByteBuffer.wrap(Bytes.toArray(uuid)), contents));
|
||
}
|
||
|
||
/** Returns an audio sample entry box based on the MIME type. */
|
||
public static ByteBuffer audioSampleEntry(Format format) {
|
||
String fourcc = codecSpecificFourcc(format);
|
||
ByteBuffer codecSpecificBox = codecSpecificBox(format);
|
||
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(codecSpecificBox.remaining() + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // reserved
|
||
contents.putShort((short) 0x0); // reserved
|
||
contents.putShort((short) 0x1); // data ref index
|
||
contents.putInt(0x0); // reserved
|
||
contents.putInt(0x0); // reserved
|
||
|
||
int channelCount = format.channelCount;
|
||
contents.putShort((short) channelCount);
|
||
contents.putShort((short) 16); // sample size
|
||
contents.putShort((short) 0x0); // predefined
|
||
contents.putShort((short) 0x0); // reserved
|
||
|
||
int sampleRate = format.sampleRate;
|
||
contents.putInt(sampleRate << 16);
|
||
|
||
contents.put(codecSpecificBox);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox(fourcc, contents);
|
||
}
|
||
|
||
/** Returns a codec specific box. */
|
||
public static ByteBuffer codecSpecificBox(Format format) {
|
||
String mimeType = checkNotNull(format.sampleMimeType);
|
||
switch (mimeType) {
|
||
case MimeTypes.AUDIO_AAC:
|
||
case MimeTypes.AUDIO_VORBIS:
|
||
return esdsBox(format);
|
||
case MimeTypes.AUDIO_AMR_NB:
|
||
return damrBox(/* mode= */ (short) 0x81FF); // mode set: all enabled for AMR-NB
|
||
case MimeTypes.AUDIO_AMR_WB:
|
||
return damrBox(/* mode= */ (short) 0x83FF); // mode set: all enabled for AMR-WB
|
||
case MimeTypes.AUDIO_OPUS:
|
||
return dOpsBox(format);
|
||
case MimeTypes.VIDEO_H263:
|
||
return d263Box(format);
|
||
case MimeTypes.VIDEO_H264:
|
||
return avcCBox(format);
|
||
case MimeTypes.VIDEO_H265:
|
||
return hvcCBox(format);
|
||
case MimeTypes.VIDEO_AV1:
|
||
return av1CBox(format);
|
||
case MimeTypes.VIDEO_MP4V:
|
||
return esdsBox(format);
|
||
case MimeTypes.VIDEO_VP9:
|
||
return vpcCBox(format);
|
||
default:
|
||
throw new IllegalArgumentException("Unsupported format: " + mimeType);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns a {@code VisualSampleEntry} box based upon the MIME type.
|
||
*
|
||
* <p>The {@code VisualSampleEntry} schema is defined in ISO/IEC 14496-12: 8.5.2.2.
|
||
*/
|
||
public static ByteBuffer videoSampleEntry(Format format) {
|
||
ByteBuffer codecSpecificBox = codecSpecificBox(format);
|
||
String fourcc = codecSpecificFourcc(format);
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE + codecSpecificBox.limit());
|
||
|
||
// reserved = 0 (6 bytes)
|
||
contents.putInt(0);
|
||
contents.putShort((short) 0);
|
||
|
||
contents.putShort((short) 1); // data_reference_index
|
||
|
||
contents.putShort((short) 0); // pre_defined
|
||
contents.putShort((short) 0); // reserved
|
||
|
||
// pre_defined
|
||
contents.putInt(0);
|
||
contents.putInt(0);
|
||
contents.putInt(0);
|
||
|
||
contents.putShort(format.width != Format.NO_VALUE ? (short) format.width : 0);
|
||
contents.putShort(format.height != Format.NO_VALUE ? (short) format.height : 0);
|
||
|
||
contents.putInt(0x00480000); // horizresolution = 72 dpi
|
||
contents.putInt(0x00480000); // vertresolution = 72 dpi
|
||
|
||
contents.putInt(0); // reserved
|
||
|
||
contents.putShort((short) 1); // frame_count
|
||
|
||
// compressorname
|
||
contents.putLong(0);
|
||
contents.putLong(0);
|
||
contents.putLong(0);
|
||
contents.putLong(0);
|
||
|
||
contents.putShort((short) 0x0018); // depth
|
||
contents.putShort((short) -1); // pre_defined
|
||
|
||
contents.put(codecSpecificBox);
|
||
if (format.colorInfo != null && fourcc.equals("vp09")) {
|
||
contents.put(smDmBox(format.colorInfo));
|
||
}
|
||
|
||
contents.put(paspBox());
|
||
|
||
// Put in a "colr" box if any of the three color format parameters has a non-default (0) value.
|
||
// TODO: b/278101856 - Only null check should be enough once we disallow invalid values.
|
||
if (format.colorInfo != null
|
||
&& (format.colorInfo.colorSpace != 0
|
||
|| format.colorInfo.colorTransfer != 0
|
||
|| format.colorInfo.colorRange != 0)) {
|
||
contents.put(colrBox(format.colorInfo));
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox(fourcc, contents);
|
||
}
|
||
|
||
/**
|
||
* Converts sample presentation times (in microseconds) to sample durations (in timebase units).
|
||
*
|
||
* <p>All the tracks must start from the same time. If all the tracks do not start from the same
|
||
* time, then the caller must pass the minimum presentation timestamp across all tracks to be set
|
||
* for the first sample. As a result, the duration of that first sample may be larger.
|
||
*
|
||
* @param samplesInfo A list of {@linkplain BufferInfo sample info}.
|
||
* @param firstSamplePresentationTimeUs The presentation timestamp to override the first sample's
|
||
* presentation timestamp, in microseconds. This should be the minimum presentation timestamp
|
||
* across all tracks if the {@code samplesInfo} contains the first sample of the track.
|
||
* Otherwise this should be equal to the presentation timestamp of first sample present in the
|
||
* {@code samplesInfo} list.
|
||
* @param videoUnitTimescale The timescale of the track.
|
||
* @param lastSampleDurationBehavior The behaviour for the last sample duration.
|
||
* @param endOfStreamTimestampUs The timestamp (in microseconds) of the end of stream sample.
|
||
* @return A list of all the sample durations.
|
||
*/
|
||
public static List<Integer> convertPresentationTimestampsToDurationsVu(
|
||
List<BufferInfo> samplesInfo,
|
||
long firstSamplePresentationTimeUs,
|
||
int videoUnitTimescale,
|
||
@Mp4Muxer.LastSampleDurationBehavior int lastSampleDurationBehavior,
|
||
long endOfStreamTimestampUs) {
|
||
List<Long> presentationTimestampsUs = new ArrayList<>(samplesInfo.size());
|
||
List<Integer> durationsVu = new ArrayList<>(samplesInfo.size());
|
||
|
||
if (samplesInfo.isEmpty()) {
|
||
return durationsVu;
|
||
}
|
||
|
||
boolean hasBframe = false;
|
||
long lastSampleCompositionTimeUs = 0L;
|
||
for (int sampleId = 0; sampleId < samplesInfo.size(); sampleId++) {
|
||
long currentSampleCompositionTimeUs = samplesInfo.get(sampleId).presentationTimeUs;
|
||
presentationTimestampsUs.add(currentSampleCompositionTimeUs);
|
||
if (currentSampleCompositionTimeUs < lastSampleCompositionTimeUs) {
|
||
hasBframe = true;
|
||
}
|
||
lastSampleCompositionTimeUs = currentSampleCompositionTimeUs;
|
||
}
|
||
|
||
if (hasBframe) {
|
||
Collections.sort(presentationTimestampsUs);
|
||
}
|
||
|
||
long currentSampleTimeUs = firstSamplePresentationTimeUs;
|
||
for (int nextSampleId = 1; nextSampleId < presentationTimestampsUs.size(); nextSampleId++) {
|
||
long nextSampleTimeUs = presentationTimestampsUs.get(nextSampleId);
|
||
long currentSampleDurationVu =
|
||
vuFromUs(nextSampleTimeUs - currentSampleTimeUs, videoUnitTimescale);
|
||
checkState(
|
||
currentSampleDurationVu <= Integer.MAX_VALUE, "Only 32-bit sample duration is allowed");
|
||
durationsVu.add((int) currentSampleDurationVu);
|
||
currentSampleTimeUs = nextSampleTimeUs;
|
||
}
|
||
|
||
long lastSampleDurationVuFromEndOfStream = C.LENGTH_UNSET;
|
||
if (endOfStreamTimestampUs != C.TIME_UNSET) {
|
||
lastSampleDurationVuFromEndOfStream =
|
||
vuFromUs(endOfStreamTimestampUs, videoUnitTimescale)
|
||
- vuFromUs(currentSampleTimeUs, videoUnitTimescale);
|
||
checkState(
|
||
lastSampleDurationVuFromEndOfStream <= Integer.MAX_VALUE,
|
||
"Only 32-bit sample duration is allowed");
|
||
}
|
||
|
||
durationsVu.add(
|
||
getLastSampleDurationVu(
|
||
durationsVu, lastSampleDurationBehavior, (int) lastSampleDurationVuFromEndOfStream));
|
||
|
||
return durationsVu;
|
||
}
|
||
|
||
/** Generates the stts (decoding time to sample) box. */
|
||
public static ByteBuffer stts(List<Integer> durationsVu) {
|
||
ByteBuffer contents = ByteBuffer.allocate(durationsVu.size() * 8 + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
// Total entry count is known only after processing all sample durations, so put in a
|
||
// placeholder for total entry count and store its index.
|
||
int totalEntryCountIndex = contents.position();
|
||
contents.putInt(0x0); // entry_count
|
||
|
||
int totalEntryCount = 0;
|
||
long lastDurationVu = -1L;
|
||
int lastSampleCountIndex = -1;
|
||
|
||
for (int i = 0; i < durationsVu.size(); i++) {
|
||
int durationVu = durationsVu.get(i);
|
||
if (lastDurationVu != durationVu) {
|
||
lastDurationVu = durationVu;
|
||
lastSampleCountIndex = contents.position();
|
||
|
||
// sample_count; this will be updated instead of adding a new entry if the next sample has
|
||
// the same duration.
|
||
contents.putInt(1);
|
||
contents.putInt(durationVu); // sample_delta
|
||
totalEntryCount++;
|
||
} else {
|
||
contents.putInt(lastSampleCountIndex, contents.getInt(lastSampleCountIndex) + 1);
|
||
}
|
||
}
|
||
|
||
contents.putInt(totalEntryCountIndex, totalEntryCount);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stts", contents);
|
||
}
|
||
|
||
/** Returns the ctts (composition time to sample) box. */
|
||
public static ByteBuffer ctts(
|
||
List<BufferInfo> samplesInfo, List<Integer> durationVu, int videoUnitTimescale) {
|
||
// Generate the sample composition offsets list to create ctts box.
|
||
List<Integer> compositionOffsets =
|
||
calculateSampleCompositionTimeOffsets(samplesInfo, durationVu, videoUnitTimescale);
|
||
|
||
if (compositionOffsets.isEmpty()) {
|
||
return ByteBuffer.allocate(0);
|
||
}
|
||
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(
|
||
2 * BYTES_PER_INTEGER + 2 * compositionOffsets.size() * BYTES_PER_INTEGER);
|
||
|
||
contents.putInt(1); // version and flags.
|
||
|
||
// Total entry count is known only after processing all the composition offsets, so put in
|
||
// a placeholder for total entry count and store its index.
|
||
int totalEntryCountIndex = contents.position();
|
||
contents.putInt(0x0); // entry_count
|
||
|
||
int totalEntryCount = 0;
|
||
int lastCompositionOffset = -1;
|
||
int lastSampleCountIndex = -1;
|
||
|
||
for (int i = 0; i < compositionOffsets.size(); i++) {
|
||
int currentCompositionOffset = compositionOffsets.get(i);
|
||
if (lastCompositionOffset != currentCompositionOffset) {
|
||
lastCompositionOffset = currentCompositionOffset;
|
||
lastSampleCountIndex = contents.position();
|
||
|
||
// sample_count; this will be updated instead of adding a new entry if the next sample has
|
||
// the same composition offset.
|
||
contents.putInt(1); // sample_count
|
||
contents.putInt(currentCompositionOffset); // sample_offset
|
||
totalEntryCount++;
|
||
} else {
|
||
contents.putInt(lastSampleCountIndex, contents.getInt(lastSampleCountIndex) + 1);
|
||
}
|
||
}
|
||
|
||
contents.putInt(totalEntryCountIndex, totalEntryCount);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("ctts", contents);
|
||
}
|
||
|
||
/**
|
||
* Calculates sample composition time offsets (in timebase units).
|
||
*
|
||
* <p>The sample composition time offset gives offset between composition time (CT) and decoding
|
||
* time (DT), such that {@code CT(n) = DT(n) + sample_offset(n)}.
|
||
*
|
||
* @param samplesInfo A list of {@linkplain BufferInfo sample info}.
|
||
* @param durationVu A list of all the sample durations.
|
||
* @param videoUnitTimescale The timescale of the track.
|
||
* @return A list of all the sample composition time offsets.
|
||
*/
|
||
public static List<Integer> calculateSampleCompositionTimeOffsets(
|
||
List<BufferInfo> samplesInfo, List<Integer> durationVu, int videoUnitTimescale) {
|
||
List<Integer> compositionOffsets = new ArrayList<>(samplesInfo.size());
|
||
if (samplesInfo.isEmpty()) {
|
||
return compositionOffsets;
|
||
}
|
||
|
||
long currentSampleDecodeTime = 0L;
|
||
long firstSamplePresentationTimeUs = samplesInfo.get(0).presentationTimeUs;
|
||
boolean hasBFrame = false;
|
||
long lastSampleCompositionTimeUs = 0L;
|
||
|
||
for (int sampleId = 0; sampleId < samplesInfo.size(); sampleId++) {
|
||
long currentSampleCompositionTimeUs =
|
||
samplesInfo.get(sampleId).presentationTimeUs - firstSamplePresentationTimeUs;
|
||
long currentCompositionOffsetVu =
|
||
vuFromUs(currentSampleCompositionTimeUs, videoUnitTimescale) - currentSampleDecodeTime;
|
||
checkState(
|
||
currentCompositionOffsetVu <= Integer.MAX_VALUE,
|
||
"Only 32-bit composition offset is allowed");
|
||
currentSampleDecodeTime += durationVu.get(sampleId); // DT(n+1) = DT(n) + STTS(n)
|
||
compositionOffsets.add((int) currentCompositionOffsetVu);
|
||
|
||
if (currentSampleCompositionTimeUs < lastSampleCompositionTimeUs) {
|
||
hasBFrame = true;
|
||
}
|
||
lastSampleCompositionTimeUs = currentSampleCompositionTimeUs;
|
||
}
|
||
|
||
if (!hasBFrame) {
|
||
compositionOffsets.clear();
|
||
}
|
||
return compositionOffsets;
|
||
}
|
||
|
||
/** Returns the stsz (sample size) box. */
|
||
public static ByteBuffer stsz(List<MediaCodec.BufferInfo> writtenSamples) {
|
||
ByteBuffer contents = ByteBuffer.allocate(writtenSamples.size() * 4 + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
// TODO: b/270583563 - Consider optimizing for identically-sized samples.
|
||
// sample_size: specifying the default sample size. Set to zero to indicate that the samples
|
||
// have different sizes and they are stored in the sample size table.
|
||
contents.putInt(0);
|
||
|
||
contents.putInt(writtenSamples.size()); // sample_count
|
||
|
||
for (int i = 0; i < writtenSamples.size(); i++) {
|
||
contents.putInt(writtenSamples.get(i).size);
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stsz", contents);
|
||
}
|
||
|
||
/** Returns the stsc (sample to chunk) box. */
|
||
public static ByteBuffer stsc(List<Integer> writtenChunkSampleCounts) {
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(writtenChunkSampleCounts.size() * 12 + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(writtenChunkSampleCounts.size()); // entry_count
|
||
|
||
int currentChunk = 1;
|
||
|
||
// TODO: b/270583563 - Consider optimizing for consecutive chunks having same number of samples.
|
||
for (int i = 0; i < writtenChunkSampleCounts.size(); i++) {
|
||
int samplesInChunk = writtenChunkSampleCounts.get(i);
|
||
contents.putInt(currentChunk); // first_chunk
|
||
contents.putInt(samplesInChunk); // samples_per_chunk
|
||
// sample_description_index: there is only one sample description in each track.
|
||
contents.putInt(1);
|
||
|
||
currentChunk += 1;
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stsc", contents);
|
||
}
|
||
|
||
/** Returns the stco (32-bit chunk offset) box. */
|
||
public static ByteBuffer stco(List<Long> writtenChunkOffsets) {
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(2 * BYTES_PER_INTEGER + writtenChunkOffsets.size() * BYTES_PER_INTEGER);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(writtenChunkOffsets.size()); // entry_count: unsigned int(32)
|
||
|
||
for (int i = 0; i < writtenChunkOffsets.size(); i++) {
|
||
long chunkOffset = writtenChunkOffsets.get(i);
|
||
checkState(chunkOffset <= UNSIGNED_INT_MAX_VALUE, "Only 32-bit chunk offset is allowed");
|
||
contents.putInt((int) chunkOffset); // chunk_offset: unsigned int(32)
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stco", contents);
|
||
}
|
||
|
||
/** Returns the co64 (64-bit chunk offset) box. */
|
||
public static ByteBuffer co64(List<Long> writtenChunkOffsets) {
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(
|
||
2 * BYTES_PER_INTEGER + 2 * writtenChunkOffsets.size() * BYTES_PER_INTEGER);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(writtenChunkOffsets.size()); // entry_count: unsigned int(32)
|
||
|
||
for (int i = 0; i < writtenChunkOffsets.size(); i++) {
|
||
contents.putLong(writtenChunkOffsets.get(i)); // chunk_offset: unsigned int(64)
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("co64", contents);
|
||
}
|
||
|
||
/** Returns the stss (sync sample) box. */
|
||
public static ByteBuffer stss(List<MediaCodec.BufferInfo> writtenSamples) {
|
||
ByteBuffer contents = ByteBuffer.allocate(writtenSamples.size() * 4 + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
|
||
// Total entry count is known only after processing all sample, so put in a placeholder
|
||
// for total entry count and store its index.
|
||
int totalEntryCountIndex = contents.position();
|
||
contents.putInt(writtenSamples.size()); // entry_count
|
||
|
||
int currentSampleNumber = 1;
|
||
int totalKeyFrames = 0;
|
||
for (int i = 0; i < writtenSamples.size(); i++) {
|
||
MediaCodec.BufferInfo info = writtenSamples.get(i);
|
||
if ((info.flags & MediaCodec.BUFFER_FLAG_KEY_FRAME) > 0) {
|
||
contents.putInt(currentSampleNumber);
|
||
totalKeyFrames++;
|
||
}
|
||
|
||
currentSampleNumber++;
|
||
}
|
||
|
||
contents.putInt(totalEntryCountIndex, totalKeyFrames);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stss", contents);
|
||
}
|
||
|
||
/** Returns the stsd (sample description) box. */
|
||
public static ByteBuffer stsd(ByteBuffer sampleEntryBox) {
|
||
ByteBuffer contents = ByteBuffer.allocate(sampleEntryBox.limit() + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(1); // entry_count: there is only one sample description in each track.
|
||
contents.put(sampleEntryBox);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("stsd", contents);
|
||
}
|
||
|
||
/** Returns the stbl (sample table) box. */
|
||
public static ByteBuffer stbl(ByteBuffer... subBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("stbl", Arrays.asList(subBoxes));
|
||
}
|
||
|
||
/** Creates the ftyp box. */
|
||
public static ByteBuffer ftyp() {
|
||
List<ByteBuffer> boxBytes = new ArrayList<>();
|
||
|
||
String majorVersion = "isom";
|
||
boxBytes.add(ByteBuffer.wrap(Util.getUtf8Bytes(majorVersion)));
|
||
|
||
int minorVersion = 0x020000;
|
||
ByteBuffer minorBytes = ByteBuffer.allocate(4);
|
||
minorBytes.putInt(minorVersion);
|
||
minorBytes.flip();
|
||
boxBytes.add(minorBytes);
|
||
|
||
String[] compatibleBrands = {"isom", "iso2", "mp41"};
|
||
for (String compatibleBrand : compatibleBrands) {
|
||
boxBytes.add(ByteBuffer.wrap(Util.getUtf8Bytes(compatibleBrand)));
|
||
}
|
||
|
||
return BoxUtils.wrapBoxesIntoBox("ftyp", boxBytes);
|
||
}
|
||
|
||
/** Returns the movie fragment (moof) box. */
|
||
public static ByteBuffer moof(ByteBuffer mfhdBox, List<ByteBuffer> trafBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox(
|
||
"moof", new ImmutableList.Builder<ByteBuffer>().add(mfhdBox).addAll(trafBoxes).build());
|
||
}
|
||
|
||
/** Returns the movie fragment header (mfhd) box. */
|
||
public static ByteBuffer mfhd(int sequenceNumber) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MFHD_BOX_CONTENT_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(sequenceNumber); // An unsigned int(32)
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("mfhd", contents);
|
||
}
|
||
|
||
/** Returns a track fragment (traf) box. */
|
||
public static ByteBuffer traf(ByteBuffer tfhdBox, ByteBuffer trunBox) {
|
||
return BoxUtils.wrapBoxesIntoBox("traf", ImmutableList.of(tfhdBox, trunBox));
|
||
}
|
||
|
||
/** Returns a track fragment header (tfhd) box. */
|
||
public static ByteBuffer tfhd(int trackId, long baseDataOffset) {
|
||
ByteBuffer contents = ByteBuffer.allocate(TFHD_BOX_CONTENT_SIZE);
|
||
// 0x000001 base-data-offset-present: indicates the presence of the base-data-offset field.
|
||
contents.putInt(0x0 | 0x000001); // version and flags
|
||
contents.putInt(trackId);
|
||
contents.putLong(baseDataOffset);
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("tfhd", contents);
|
||
}
|
||
|
||
/** Returns a track fragment run (trun) box. */
|
||
public static ByteBuffer trun(
|
||
List<SampleMetadata> samplesMetadata, int dataOffset, boolean hasBFrame) {
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(getTrunBoxContentSize(samplesMetadata.size(), hasBFrame));
|
||
|
||
// 0x000001 data-offset-present.
|
||
// 0x000100 sample-duration-present: indicates that each sample has its own duration, otherwise
|
||
// the default is used.
|
||
// 0x000200 sample-size-present: indicates that each sample has its own size, otherwise the
|
||
// default is used.
|
||
// 0x000400 sample-flags-present: indicates that each sample has its own flags, otherwise the
|
||
// default is used.
|
||
// 0x000800 sample-composition-time-offsets-present: indicates that each sample has its own
|
||
// composition time offset, otherwise default is used.
|
||
// Version (the most significant byte of versionAndFlags) is 0x1.
|
||
int versionAndFlags = 0x1 << 24 | 0x000001 | 0x000100 | 0x000200 | 0x000400;
|
||
if (hasBFrame) {
|
||
versionAndFlags |= 0x000800;
|
||
}
|
||
contents.putInt(versionAndFlags);
|
||
contents.putInt(samplesMetadata.size()); // An unsigned int(32)
|
||
contents.putInt(dataOffset); // A signed int(32)
|
||
for (int i = 0; i < samplesMetadata.size(); i++) {
|
||
SampleMetadata currentSampleMetadata = samplesMetadata.get(i);
|
||
contents.putInt(currentSampleMetadata.durationVu); // An unsigned int(32)
|
||
contents.putInt(currentSampleMetadata.size); // An unsigned int(32)
|
||
contents.putInt(
|
||
(currentSampleMetadata.flags & MediaCodec.BUFFER_FLAG_KEY_FRAME) != 0
|
||
? TRUN_BOX_SYNC_SAMPLE_FLAGS
|
||
: TRUN_BOX_NON_SYNC_SAMPLE_FLAGS);
|
||
if (hasBFrame) {
|
||
contents.putInt(currentSampleMetadata.compositionTimeOffsetVu);
|
||
}
|
||
}
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("trun", contents);
|
||
}
|
||
|
||
/** Returns the size required for {@link #trun(List, int, boolean)} box content. */
|
||
public static int getTrunBoxContentSize(int sampleCount, boolean hasBFrame) {
|
||
int trunBoxFixedSize = 3 * BYTES_PER_INTEGER;
|
||
int intWrittenPerSample = hasBFrame ? 4 : 3;
|
||
return trunBoxFixedSize + intWrittenPerSample * sampleCount * BYTES_PER_INTEGER;
|
||
}
|
||
|
||
/** Returns a movie extends (mvex) box. */
|
||
public static ByteBuffer mvex(List<ByteBuffer> trexBoxes) {
|
||
return BoxUtils.wrapBoxesIntoBox("mvex", trexBoxes);
|
||
}
|
||
|
||
/** Returns a track extends (trex) box. */
|
||
public static ByteBuffer trex(int trackId) {
|
||
ByteBuffer contents = ByteBuffer.allocate(6 * BYTES_PER_INTEGER);
|
||
contents.putInt(0x0); // version and flags
|
||
contents.putInt(trackId);
|
||
contents.putInt(1); // default_sample_description_index
|
||
contents.putInt(0); // default_sample_duration
|
||
contents.putInt(0); // default_sample_size
|
||
contents.putInt(0); // default_sample_flags
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("trex", contents);
|
||
}
|
||
|
||
/** Returns the edvd box header. */
|
||
public static ByteBuffer getEdvdBoxHeader(long payloadSize) {
|
||
ByteBuffer edvdBoxHeader = ByteBuffer.allocate(LARGE_SIZE_BOX_HEADER_SIZE);
|
||
edvdBoxHeader.putInt(1); // indicating a 64-bit length field
|
||
edvdBoxHeader.put(Util.getUtf8Bytes("edvd"));
|
||
edvdBoxHeader.putLong(LARGE_SIZE_BOX_HEADER_SIZE + payloadSize); // the actual length
|
||
edvdBoxHeader.flip();
|
||
return edvdBoxHeader;
|
||
}
|
||
|
||
/** Returns an ISO 639-2/T (ISO3) language code for the IETF BCP 47 language tag. */
|
||
private static @PolyNull String bcp47LanguageTagToIso3(@PolyNull String languageTag) {
|
||
if (languageTag == null) {
|
||
return null;
|
||
}
|
||
|
||
Locale locale = Locale.forLanguageTag(languageTag);
|
||
|
||
return locale.getISO3Language().isEmpty() ? languageTag : locale.getISO3Language();
|
||
}
|
||
|
||
/** Converts video units to microseconds, using the provided timebase. */
|
||
private static long usFromVu(long timestampVu, long videoUnitTimebase) {
|
||
return Util.scaleLargeValue(
|
||
timestampVu, C.MICROS_PER_SECOND, videoUnitTimebase, RoundingMode.HALF_UP);
|
||
}
|
||
|
||
/** Returns the duration of the last sample (in video units). */
|
||
private static int getLastSampleDurationVu(
|
||
List<Integer> sampleDurationsExceptLast,
|
||
@Mp4Muxer.LastSampleDurationBehavior int lastSampleDurationBehavior,
|
||
int lastSampleDurationVuFromEndOfStream) {
|
||
switch (lastSampleDurationBehavior) {
|
||
case Mp4Muxer.LAST_SAMPLE_DURATION_BEHAVIOR_SET_TO_ZERO:
|
||
return 0;
|
||
case Mp4Muxer
|
||
.LAST_SAMPLE_DURATION_BEHAVIOR_SET_FROM_END_OF_STREAM_BUFFER_OR_DUPLICATE_PREVIOUS:
|
||
if (lastSampleDurationVuFromEndOfStream != C.LENGTH_UNSET) {
|
||
return lastSampleDurationVuFromEndOfStream;
|
||
}
|
||
// For a track having less than 3 samples, duplicating the last frame duration will
|
||
// significantly increase the overall track duration, so avoid that.
|
||
return sampleDurationsExceptLast.size() < 2
|
||
? 0
|
||
: Iterables.getLast(sampleDurationsExceptLast);
|
||
default:
|
||
throw new IllegalArgumentException(
|
||
"Unexpected value for the last frame duration behavior " + lastSampleDurationBehavior);
|
||
}
|
||
}
|
||
|
||
/** Returns the d263Box box as per 3GPP ETSI TS 126 244: 6.8. */
|
||
private static ByteBuffer d263Box(Format format) {
|
||
ByteBuffer d263Box = ByteBuffer.allocate(7);
|
||
d263Box.put(" ".getBytes(UTF_8)); // 4 spaces (vendor)
|
||
d263Box.put((byte) 0x00); // decoder version
|
||
Pair<Integer, Integer> profileAndLevel = CodecSpecificDataUtil.getCodecProfileAndLevel(format);
|
||
if (profileAndLevel == null) {
|
||
profileAndLevel =
|
||
new Pair<>(
|
||
MediaCodecInfo.CodecProfileLevel.H263ProfileBaseline,
|
||
MediaCodecInfo.CodecProfileLevel.H263Level10);
|
||
}
|
||
d263Box.put(profileAndLevel.second.byteValue()); // level
|
||
d263Box.put(profileAndLevel.first.byteValue()); // profile
|
||
|
||
d263Box.flip();
|
||
return BoxUtils.wrapIntoBox("d263", d263Box);
|
||
}
|
||
|
||
/** Returns the avcC box as per ISO/IEC 14496-15: 5.3.3.1.2. */
|
||
private static ByteBuffer avcCBox(Format format) {
|
||
checkArgument(
|
||
format.initializationData.size() >= 2, "csd-0 and/or csd-1 not found in the format.");
|
||
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(csd0.length > 0, "csd-0 is empty.");
|
||
|
||
byte[] csd1 = format.initializationData.get(1);
|
||
checkArgument(csd1.length > 0, "csd-1 is empty.");
|
||
|
||
ByteBuffer csd0ByteBuffer = ByteBuffer.wrap(csd0);
|
||
ByteBuffer csd1ByteBuffer = ByteBuffer.wrap(csd1);
|
||
|
||
ByteBuffer contents =
|
||
ByteBuffer.allocate(
|
||
csd0ByteBuffer.limit() + csd1ByteBuffer.limit() + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.put((byte) 0x01); // configurationVersion
|
||
|
||
ImmutableList<ByteBuffer> csd0NalUnits = AnnexBUtils.findNalUnits(csd0ByteBuffer);
|
||
checkArgument(csd0NalUnits.size() == 1, "SPS data not found in csd0.");
|
||
|
||
ByteBuffer sps = csd0NalUnits.get(0);
|
||
byte[] spsData = new byte[sps.remaining()];
|
||
sps.get(spsData);
|
||
sps.rewind();
|
||
|
||
NalUnitUtil.SpsData h264SpsData =
|
||
NalUnitUtil.parseSpsNalUnit(spsData, /* nalOffset= */ 0, spsData.length);
|
||
contents.put((byte) h264SpsData.profileIdc); // AVCProfileIndication
|
||
contents.put((byte) h264SpsData.constraintsFlagsAndReservedZero2Bits); // profile_compatibility
|
||
contents.put((byte) h264SpsData.levelIdc); // AVCLevelIndication
|
||
|
||
contents.put((byte) 0xFF); // 6 bits reserved ('0b111111') + 2 bits lengthSizeMinusOne (3)
|
||
contents.put((byte) 0xE1); // 3 bits reserved ('0b111') + 5 bits numOfSequenceParameterSets (1)
|
||
contents.putShort((short) sps.remaining()); // sequenceParameterSetLength
|
||
contents.put(sps); // sequenceParameterSetNALUnit
|
||
sps.rewind();
|
||
|
||
ImmutableList<ByteBuffer> csd1NalUnits = AnnexBUtils.findNalUnits(csd1ByteBuffer);
|
||
checkState(csd1NalUnits.size() == 1, "PPS data not found in csd1.");
|
||
|
||
contents.put((byte) 0x01); // numOfPictureParameterSets
|
||
|
||
ByteBuffer pps = csd1NalUnits.get(0);
|
||
contents.putShort((short) pps.remaining()); // pictureParameterSetLength
|
||
contents.put(pps); // pictureParameterSetNALUnit
|
||
pps.rewind();
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("avcC", contents);
|
||
}
|
||
|
||
/** Returns the hvcC box as per ISO/IEC 14496-15: 8.3.3.1.2. */
|
||
private static ByteBuffer hvcCBox(Format format) {
|
||
// For H.265, all three codec-specific NALUs (VPS, SPS, PPS) are packed into csd-0.
|
||
checkArgument(!format.initializationData.isEmpty(), "csd-0 not found in the format.");
|
||
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(csd0.length > 0, "csd-0 is empty.");
|
||
|
||
ByteBuffer csd0ByteBuffer = ByteBuffer.wrap(csd0);
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(csd0ByteBuffer.limit() + MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
ImmutableList<ByteBuffer> nalusWithEmulationPrevention =
|
||
AnnexBUtils.findNalUnits(csd0ByteBuffer);
|
||
|
||
// Remove emulation prevention bytes to parse the actual csd-0 data.
|
||
// For storing the csd-0 data into MP4 file, use original NALUs with emulation prevention bytes.
|
||
List<ByteBuffer> nalusWithoutEmulationPrevention = new ArrayList<>();
|
||
for (int i = 0; i < nalusWithEmulationPrevention.size(); i++) {
|
||
nalusWithoutEmulationPrevention.add(
|
||
AnnexBUtils.stripEmulationPrevention(nalusWithEmulationPrevention.get(i)));
|
||
}
|
||
|
||
contents.put((byte) 0x01); // configurationVersion
|
||
|
||
// Assuming that VPS, SPS and PPS are in this order in csd-0.
|
||
ByteBuffer vps = nalusWithoutEmulationPrevention.get(0);
|
||
|
||
if (vps.get(vps.position()) != 0x40) {
|
||
throw new IllegalArgumentException("First NALU in csd-0 is not the VPS.");
|
||
}
|
||
|
||
// general_profile_space (2 bits) + general_tier_flag (1 bit) + general_profile_idc (5 bits)
|
||
contents.put(vps.get(6));
|
||
|
||
contents.putInt(vps.getInt(7)); // general_profile_compatibility_flags
|
||
|
||
// general_constraint_indicator_flags (6 bytes)
|
||
contents.putInt(vps.getInt(11));
|
||
contents.putShort(vps.getShort(15));
|
||
|
||
contents.put(vps.get(17)); // general_level_idc
|
||
|
||
// First 4 bits reserved + min_spatial_segmentation_idc (12 bits)
|
||
contents.putShort((short) 0xF000);
|
||
|
||
// First 6 bits reserved + parallelismType (2 bits)
|
||
contents.put((byte) 0xFC);
|
||
|
||
ByteBuffer sps = nalusWithEmulationPrevention.get(1);
|
||
byte[] spsArray = new byte[sps.remaining()];
|
||
sps.get(spsArray);
|
||
sps.rewind();
|
||
|
||
NalUnitUtil.H265SpsData h265SpsData =
|
||
NalUnitUtil.parseH265SpsNalUnit(
|
||
spsArray, /* nalOffset= */ 0, /* nalLimit= */ spsArray.length, /* vpsData= */ null);
|
||
|
||
byte chromaFormat = (byte) (0xFC | h265SpsData.chromaFormatIdc); // First 6 bits reserved
|
||
byte bitDepthLumaMinus8 =
|
||
(byte) (0xF8 | h265SpsData.bitDepthLumaMinus8); // First 5 bits reserved
|
||
byte bitDepthChromaMinus8 =
|
||
(byte) (0xF8 | h265SpsData.bitDepthChromaMinus8); // First 5 bits reserved
|
||
contents.put(chromaFormat);
|
||
contents.put(bitDepthLumaMinus8);
|
||
contents.put(bitDepthChromaMinus8);
|
||
|
||
// avgFrameRate: value 0 indicates an unspecified average frame rate.
|
||
contents.putShort((short) 0);
|
||
|
||
// constantFrameRate (2 bits) + numTemporalLayers (3 bits) + temporalIdNested (1 bit) +
|
||
// lengthSizeMinusOne (2 bits)
|
||
contents.put((byte) 0x0F);
|
||
|
||
// Put all NALUs.
|
||
contents.put((byte) nalusWithEmulationPrevention.size()); // numOfArrays
|
||
|
||
for (int i = 0; i < nalusWithEmulationPrevention.size(); i++) {
|
||
ByteBuffer nalu = nalusWithEmulationPrevention.get(i);
|
||
|
||
// array_completeness (1 bit) + reserved (1 bit) + NAL_unit_type (6 bits)
|
||
byte naluType = (byte) ((nalu.get(0) >> 1) & 0x3F);
|
||
contents.put(naluType);
|
||
|
||
contents.putShort((short) 1); // numNalus; number of NALUs in array
|
||
contents.putShort((short) nalu.limit()); // nalUnitLength
|
||
contents.put(nalu);
|
||
}
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("hvcC", contents);
|
||
}
|
||
|
||
/** Returns the av1C box. */
|
||
private static ByteBuffer av1CBox(Format format) {
|
||
// For AV1, the entire codec-specific box is packed into csd-0.
|
||
checkArgument(!format.initializationData.isEmpty(), "csd-0 is not found in the format");
|
||
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(csd0.length > 0, "csd-0 is empty.");
|
||
|
||
return BoxUtils.wrapIntoBox("av1C", ByteBuffer.wrap(csd0));
|
||
}
|
||
|
||
/** Returns the vpcC box as per VP Codec ISO Media File Format Binding v1.0. */
|
||
private static ByteBuffer vpcCBox(Format format) {
|
||
// For VP9, the CodecPrivate or vpcCBox data is packed into csd-0.
|
||
checkArgument(!format.initializationData.isEmpty(), "csd-0 is not found in the format");
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(csd0.length > 3, "csd-0 for vp9 is invalid.");
|
||
int versionAndFlags = 1 << 24; // version (value 1, 8 bits) + flag (value 0, 24 bits)
|
||
if (Ints.fromByteArray(csd0) == versionAndFlags) {
|
||
// CSD is already in vpcC format.
|
||
return BoxUtils.wrapIntoBox("vpcC", ByteBuffer.wrap(csd0));
|
||
}
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.putInt(versionAndFlags);
|
||
// Default value of videoRange is 0.
|
||
int videoRange = format.colorInfo != null ? format.colorInfo.colorRange : 0;
|
||
ByteBuffer codecPrivateContent = parseVp9CodecPrivateFromCsd(csd0, videoRange);
|
||
contents.put(codecPrivateContent);
|
||
|
||
// The default values for optional fields as per the : <a
|
||
// href="https://www.webmproject.org/vp9/mp4/#optional-fields">Vp9 webm spec</a>
|
||
int colourPrimaries = 1;
|
||
int transferCharacteristics = 1;
|
||
int matrixCoefficients = 1;
|
||
|
||
if (format.colorInfo != null) {
|
||
colourPrimaries = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(videoRange).get(0);
|
||
transferCharacteristics =
|
||
MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER.get(format.colorInfo.colorTransfer);
|
||
matrixCoefficients = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(videoRange).get(1);
|
||
}
|
||
|
||
contents.put((byte) colourPrimaries);
|
||
contents.put((byte) transferCharacteristics);
|
||
contents.put((byte) matrixCoefficients);
|
||
contents.putShort((short) 0); // codecInitializationDataSize must be 0 for VP9
|
||
// codecInitializationData is not used for VP9 so skipped writing to contents
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("vpcC", contents);
|
||
}
|
||
|
||
/**
|
||
* Parses a Vp9 CodecPrivate as per <a
|
||
* href="https://www.webmproject.org/docs/container/#vp9-codec-feature-metadata-codecprivate">Vp9
|
||
* spec</a>
|
||
*/
|
||
private static ByteBuffer parseVp9CodecPrivateFromCsd(byte[] csd0, int videoFullRange) {
|
||
// The default values.
|
||
byte profile = 0;
|
||
byte level = 10;
|
||
byte bitDepth = 8;
|
||
byte chromaSubsampling = 0;
|
||
// Each feature is defined by the binary format of ID (1 byte), length (1 byte), and data (1
|
||
// byte).
|
||
for (int i = 0; i < csd0.length; i += 3) {
|
||
int id = csd0[i];
|
||
int dataIndex = i + 2;
|
||
switch (id) {
|
||
case 1:
|
||
profile = csd0[dataIndex];
|
||
break;
|
||
case 2:
|
||
level = csd0[dataIndex];
|
||
break;
|
||
case 3:
|
||
bitDepth = csd0[dataIndex];
|
||
break;
|
||
case 4:
|
||
chromaSubsampling = csd0[dataIndex];
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
ByteBuffer content = ByteBuffer.allocate(3);
|
||
content.put(profile);
|
||
content.put(level);
|
||
// 4 bits of bitDepth + 3 bits of chromaSubsampling + 1 bit of videoRange
|
||
byte combined = (byte) ((bitDepth << 4) | (chromaSubsampling << 1) | videoFullRange);
|
||
content.put(combined);
|
||
content.flip();
|
||
return content;
|
||
}
|
||
|
||
/**
|
||
* Returns smDm box as per <a
|
||
* href="https://www.webmproject.org/vp9/mp4/#smpte-2086-mastering-display-metadata-box ">SmDm box
|
||
* in Vp9 spec</a>
|
||
*/
|
||
private static ByteBuffer smDmBox(ColorInfo colorInfo) {
|
||
byte[] hdrStaticInfo = colorInfo.hdrStaticInfo;
|
||
if (hdrStaticInfo != null) {
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flag
|
||
contents.put(hdrStaticInfo);
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("SmDm", contents);
|
||
} else {
|
||
// No HDR info
|
||
return ByteBuffer.allocate(0);
|
||
}
|
||
}
|
||
|
||
/** Returns the pasp box. */
|
||
private static ByteBuffer paspBox() {
|
||
ByteBuffer contents = ByteBuffer.allocate(8);
|
||
|
||
contents.putInt(1 << 16); // hspacing
|
||
contents.putInt(1 << 16); // vspacing
|
||
|
||
contents.rewind();
|
||
return BoxUtils.wrapIntoBox("pasp", contents);
|
||
}
|
||
|
||
/** Returns the colr box. */
|
||
@SuppressWarnings("InlinedApi")
|
||
private static ByteBuffer colrBox(ColorInfo colorInfo) {
|
||
ByteBuffer contents = ByteBuffer.allocate(20);
|
||
contents.put((byte) 'n');
|
||
contents.put((byte) 'c');
|
||
contents.put((byte) 'l');
|
||
contents.put((byte) 'x');
|
||
|
||
short primaries = 0;
|
||
short transfer = 0;
|
||
short matrix = 0;
|
||
byte range = 0;
|
||
|
||
if (colorInfo.colorSpace != Format.NO_VALUE) {
|
||
int standard = colorInfo.colorSpace;
|
||
if (standard < 0 || standard >= MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.size()) {
|
||
throw new IllegalArgumentException("Color standard not implemented: " + standard);
|
||
}
|
||
|
||
primaries = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(standard).get(0);
|
||
matrix = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(standard).get(1);
|
||
}
|
||
|
||
if (colorInfo.colorTransfer != Format.NO_VALUE) {
|
||
int transferInFormat = colorInfo.colorTransfer;
|
||
if (transferInFormat < 0 || transferInFormat >= MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER.size()) {
|
||
throw new IllegalArgumentException("Color transfer not implemented: " + transferInFormat);
|
||
}
|
||
|
||
transfer = MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER.get(transferInFormat);
|
||
}
|
||
|
||
if (colorInfo.colorRange != Format.NO_VALUE) {
|
||
int rangeInFormat = colorInfo.colorRange;
|
||
// Handled values are 0 (unknown), 1 (full) and 2 (limited).
|
||
if (rangeInFormat < 0 || rangeInFormat > 2) {
|
||
throw new IllegalArgumentException("Color range not implemented: " + rangeInFormat);
|
||
}
|
||
|
||
// Set this to 0x80 only for full range, 0 otherwise.
|
||
range = rangeInFormat == C.COLOR_RANGE_FULL ? (byte) 0x80 : 0;
|
||
}
|
||
|
||
contents.putShort(primaries);
|
||
contents.putShort(transfer);
|
||
contents.putShort(matrix);
|
||
contents.put(range);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("colr", contents);
|
||
}
|
||
|
||
/** Returns codec specific fourcc. */
|
||
private static String codecSpecificFourcc(Format format) {
|
||
String mimeType = checkNotNull(format.sampleMimeType);
|
||
switch (mimeType) {
|
||
case MimeTypes.AUDIO_AAC:
|
||
case MimeTypes.AUDIO_VORBIS:
|
||
return "mp4a";
|
||
case MimeTypes.AUDIO_AMR_NB:
|
||
return "samr";
|
||
case MimeTypes.AUDIO_AMR_WB:
|
||
return "sawb";
|
||
case MimeTypes.VIDEO_H263:
|
||
return "s263";
|
||
case MimeTypes.AUDIO_OPUS:
|
||
return "Opus";
|
||
case MimeTypes.VIDEO_H264:
|
||
return "avc1";
|
||
case MimeTypes.VIDEO_H265:
|
||
return "hvc1";
|
||
case MimeTypes.VIDEO_AV1:
|
||
return "av01";
|
||
case MimeTypes.VIDEO_MP4V:
|
||
return "mp4v-es";
|
||
case MimeTypes.VIDEO_VP9:
|
||
return "vp09";
|
||
default:
|
||
throw new IllegalArgumentException("Unsupported format: " + mimeType);
|
||
}
|
||
}
|
||
|
||
/** Returns the esds box. */
|
||
private static ByteBuffer esdsBox(Format format) {
|
||
checkArgument(!format.initializationData.isEmpty(), "csd-0 not found in the format.");
|
||
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(csd0.length > 0, "csd-0 is empty.");
|
||
|
||
String mimeType = checkNotNull(format.sampleMimeType);
|
||
boolean isVorbis = mimeType.equals(MimeTypes.AUDIO_VORBIS);
|
||
ByteBuffer csdByteBuffer =
|
||
isVorbis ? getVorbisInitializationData(format) : ByteBuffer.wrap(csd0);
|
||
|
||
int peakBitrate = format.peakBitrate;
|
||
int averageBitrate = format.averageBitrate;
|
||
boolean isVideo = MimeTypes.isVideo(mimeType);
|
||
|
||
int csdSize = csdByteBuffer.remaining();
|
||
ByteBuffer dsiSizeBuffer = getSizeBuffer(csdSize);
|
||
ByteBuffer dcdSizeBuffer = getSizeBuffer(csdSize + dsiSizeBuffer.remaining() + 14);
|
||
ByteBuffer esdSizeBuffer =
|
||
getSizeBuffer(csdSize + dsiSizeBuffer.remaining() + dcdSizeBuffer.remaining() + 21);
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(csdSize + MAX_FIXED_LEAF_BOX_SIZE);
|
||
contents.putInt(0x0); // version and flags
|
||
contents.put((byte) 0x03); // ES_DescrTag
|
||
|
||
contents.put(esdSizeBuffer);
|
||
|
||
contents.putShort((short) 0x0000); // ES_ID
|
||
// streamDependenceFlag (1 bit) + URL_Flag (1 bit) + OCRstreamFlag (1 bit) + streamPriority (5
|
||
// bits)
|
||
contents.put(isVideo ? (byte) 0x1f : (byte) 0x0);
|
||
|
||
contents.put((byte) 0x04); // DecoderConfigDescrTag
|
||
contents.put(dcdSizeBuffer);
|
||
|
||
Byte objectType = checkNotNull(MimeTypes.getMp4ObjectTypeFromMimeType(mimeType));
|
||
contents.put(objectType); // objectTypeIndication
|
||
|
||
// streamType (6 bits) + upStream (1 bit) + reserved = 1 (1 bit)
|
||
contents.put((byte) ((isVideo ? (0x04 << 2) : (0x05 << 2)) | 0x01));
|
||
|
||
int size = isVideo ? 0x017700 : 0x000300;
|
||
contents.putShort((short) ((size >> 8) & 0xFFFF)); // First 16 bits of buffer size.
|
||
contents.put((byte) 0x0); // Last 8 bits of buffer size.
|
||
|
||
contents.putInt(peakBitrate != Format.NO_VALUE ? peakBitrate : 0);
|
||
contents.putInt(averageBitrate != Format.NO_VALUE ? averageBitrate : 0);
|
||
|
||
contents.put((byte) 0x05); // DecoderSpecificInfoTag
|
||
contents.put(dsiSizeBuffer);
|
||
contents.put(csdByteBuffer);
|
||
csdByteBuffer.rewind();
|
||
|
||
contents.put((byte) 0x06); // SLConfigDescriptorTag
|
||
contents.put((byte) 0x01);
|
||
contents.put((byte) 0x02);
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("esds", contents);
|
||
}
|
||
|
||
private static ByteBuffer getSizeBuffer(int length) {
|
||
int prefix = 0;
|
||
ArrayDeque<Byte> esdsSizeBytes = new ArrayDeque<>();
|
||
do {
|
||
esdsSizeBytes.push((byte) (prefix | (length & 0x7F)));
|
||
length >>= 7;
|
||
prefix = 0x80;
|
||
} while (length > 0);
|
||
|
||
ByteBuffer sizeBuffer = ByteBuffer.allocate(esdsSizeBytes.size());
|
||
while (!esdsSizeBytes.isEmpty()) {
|
||
sizeBuffer.put(esdsSizeBytes.removeFirst());
|
||
}
|
||
sizeBuffer.flip();
|
||
return sizeBuffer;
|
||
}
|
||
|
||
/* Returns csd wrapped in ByteBuffer in vorbis codec initialization data format. */
|
||
private static ByteBuffer getVorbisInitializationData(Format format) {
|
||
checkArgument(
|
||
format.initializationData.size() > 1, "csd-1 should contain setup header for Vorbis.");
|
||
byte[] csd0 = format.initializationData.get(0); // identification Header
|
||
|
||
// csd0Size is represented using "Xiph lacing" style.
|
||
// The lacing size is split into 255 values, stored as unsigned octets – for example, 500 is
|
||
// coded 255;245 or [0xFF 0xF5]. A frame with a size multiple of 255 is coded with a 0 at the
|
||
// end of the size – for example, 765 is coded 255;255;255;0 or [0xFF 0xFF 0xFF 0x00].
|
||
byte[] csd0Size = new byte[csd0.length / 255 + 1];
|
||
Arrays.fill(csd0Size, (byte) 0xFF);
|
||
csd0Size[csd0Size.length - 1] = (byte) (csd0.length % 255);
|
||
|
||
byte[] csd1 = format.initializationData.get(1); // setUp Header
|
||
checkArgument(csd1.length > 0, "csd-1 should be present and contain setup header for Vorbis.");
|
||
|
||
// Add 2 bytes - 1 for Vorbis audio and 1 for comment header length.
|
||
ByteBuffer csd = ByteBuffer.allocate(csd0Size.length + csd0.length + csd1.length + 2);
|
||
csd.put((byte) 0x02); // Vorbis audio
|
||
csd.put(csd0Size); // Size of identification header
|
||
csd.put((byte) 0); // Length of comment header
|
||
csd.put(csd0);
|
||
csd.put(csd1);
|
||
csd.flip();
|
||
|
||
return csd;
|
||
}
|
||
|
||
/** Returns the audio damr box. */
|
||
private static ByteBuffer damrBox(short mode) {
|
||
|
||
ByteBuffer contents = ByteBuffer.allocate(MAX_FIXED_LEAF_BOX_SIZE);
|
||
|
||
contents.put(" ".getBytes(UTF_8)); // vendor: 4 bytes
|
||
contents.put((byte) 0); // decoder version
|
||
contents.putShort(mode);
|
||
contents.put((byte) 0); // mode change period
|
||
contents.put((byte) 1); // frames per sample
|
||
|
||
contents.flip();
|
||
return BoxUtils.wrapIntoBox("damr", contents);
|
||
}
|
||
|
||
/** Returns the audio dOps box for Opus codec as per RFC-7845: 5.1. */
|
||
private static ByteBuffer dOpsBox(Format format) {
|
||
checkArgument(!format.initializationData.isEmpty());
|
||
|
||
int opusHeaderLength = 8;
|
||
byte[] csd0 = format.initializationData.get(0);
|
||
checkArgument(
|
||
csd0.length >= opusHeaderLength,
|
||
"As csd0 contains 'OpusHead' in first 8 bytes, csd0 length should be greater than 8");
|
||
ByteBuffer contents = ByteBuffer.allocate(csd0.length);
|
||
// Skip 8 bytes containing "OpusHead".
|
||
contents.put(
|
||
/* src */ csd0, /* offset */ opusHeaderLength, /* length */ csd0.length - opusHeaderLength);
|
||
contents.flip();
|
||
|
||
return BoxUtils.wrapIntoBox("dOps", contents);
|
||
}
|
||
|
||
/** Packs a three-letter language code into a short, packing 3x5 bits. */
|
||
private static short languageCodeFromString(@Nullable String code) {
|
||
if (code == null) {
|
||
return 0;
|
||
}
|
||
|
||
byte[] bytes = Util.getUtf8Bytes(code);
|
||
|
||
if (bytes.length != 3) {
|
||
throw new IllegalArgumentException("Non-length-3 language code: " + code);
|
||
}
|
||
|
||
// Take only last 5 bits of each letter.
|
||
int value = (bytes[2] & 0x1F);
|
||
value += (bytes[1] & 0x1F) << 5;
|
||
value += (bytes[0] & 0x1F) << 10;
|
||
|
||
// Total 15 bits for the language code and the 16th bit should be 0.
|
||
return (short) (value & 0x7FFF);
|
||
}
|
||
|
||
/**
|
||
* Generates an orientation matrix, to be included in the MP4 header.
|
||
*
|
||
* <p>The supported values are 0, 90, 180 and 270 (degrees).
|
||
*/
|
||
private static byte[] rotationMatrixFromOrientation(int orientation) {
|
||
// The transformation matrix is defined as below:
|
||
// | a b u |
|
||
// | c d v |
|
||
// | x y w |
|
||
// To specify the orientation (u, v, w) are restricted to (0, 0, 0x40000000).
|
||
// Reference: ISO/IEC 14496-12: 8.2.2.3.
|
||
int fixedOne = 65536;
|
||
switch (orientation) {
|
||
case 0:
|
||
return Util.toByteArray(fixedOne, 0, 0, 0, fixedOne, 0, 0, 0, 0x40000000);
|
||
case 90:
|
||
return Util.toByteArray(0, fixedOne, 0, -fixedOne, 0, 0, 0, 0, 0x40000000);
|
||
case 180:
|
||
return Util.toByteArray(-fixedOne, 0, 0, 0, -fixedOne, 0, 0, 0, 0x40000000);
|
||
case 270:
|
||
return Util.toByteArray(0, -fixedOne, 0, fixedOne, 0, 0, 0, 0, 0x40000000);
|
||
default:
|
||
throw new IllegalArgumentException("invalid orientation " + orientation);
|
||
}
|
||
}
|
||
|
||
/** Converts microseconds to video units, using the provided timebase. */
|
||
private static long vuFromUs(long timestampUs, long videoUnitTimebase) {
|
||
return Util.scaleLargeValue(
|
||
timestampUs, videoUnitTimebase, C.MICROS_PER_SECOND, RoundingMode.HALF_UP);
|
||
}
|
||
}
|