mirror of
https://github.com/samsonjs/media.git
synced 2026-03-27 09:45:47 +00:00
Add SilenceSkippingAudioProcessor
This uses a simple threshold-based algorithm for classifying audio frames as silent, and removes silences from input audio that last longer than a given duration. The plan is to expose this functionality via PlaybackParameters in a later change. Issue: #2635 ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=190737027
This commit is contained in:
parent
18df028ce2
commit
6dcfe57fd3
3 changed files with 838 additions and 3 deletions
|
|
@ -138,6 +138,7 @@ public final class DefaultAudioSink implements AudioSink {
|
|||
private final boolean enableConvertHighResIntPcmToFloat;
|
||||
private final ChannelMappingAudioProcessor channelMappingAudioProcessor;
|
||||
private final TrimmingAudioProcessor trimmingAudioProcessor;
|
||||
private final SilenceSkippingAudioProcessor silenceSkippingAudioProcessor;
|
||||
private final SonicAudioProcessor sonicAudioProcessor;
|
||||
private final AudioProcessor[] toIntPcmAvailableAudioProcessors;
|
||||
private final AudioProcessor[] toFloatPcmAvailableAudioProcessors;
|
||||
|
|
@ -226,14 +227,16 @@ public final class DefaultAudioSink implements AudioSink {
|
|||
audioTrackPositionTracker = new AudioTrackPositionTracker(new PositionTrackerListener());
|
||||
channelMappingAudioProcessor = new ChannelMappingAudioProcessor();
|
||||
trimmingAudioProcessor = new TrimmingAudioProcessor();
|
||||
silenceSkippingAudioProcessor = new SilenceSkippingAudioProcessor();
|
||||
sonicAudioProcessor = new SonicAudioProcessor();
|
||||
toIntPcmAvailableAudioProcessors = new AudioProcessor[4 + audioProcessors.length];
|
||||
toIntPcmAvailableAudioProcessors = new AudioProcessor[5 + audioProcessors.length];
|
||||
toIntPcmAvailableAudioProcessors[0] = new ResamplingAudioProcessor();
|
||||
toIntPcmAvailableAudioProcessors[1] = channelMappingAudioProcessor;
|
||||
toIntPcmAvailableAudioProcessors[2] = trimmingAudioProcessor;
|
||||
System.arraycopy(
|
||||
audioProcessors, 0, toIntPcmAvailableAudioProcessors, 3, audioProcessors.length);
|
||||
toIntPcmAvailableAudioProcessors[3 + audioProcessors.length] = sonicAudioProcessor;
|
||||
toIntPcmAvailableAudioProcessors[3 + audioProcessors.length] = silenceSkippingAudioProcessor;
|
||||
toIntPcmAvailableAudioProcessors[4 + audioProcessors.length] = sonicAudioProcessor;
|
||||
toFloatPcmAvailableAudioProcessors = new AudioProcessor[] {new FloatResamplingAudioProcessor()};
|
||||
volume = 1.0f;
|
||||
startMediaTimeState = START_NOT_SET;
|
||||
|
|
@ -272,7 +275,7 @@ public final class DefaultAudioSink implements AudioSink {
|
|||
}
|
||||
long positionUs = audioTrackPositionTracker.getCurrentPositionUs(sourceEnded);
|
||||
positionUs = Math.min(positionUs, framesToDurationUs(getWrittenFrames()));
|
||||
return startMediaTimeUs + applySpeedup(positionUs);
|
||||
return startMediaTimeUs + applySkipping(applySpeedup(positionUs));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -938,6 +941,10 @@ public final class DefaultAudioSink implements AudioSink {
|
|||
positionUs - playbackParametersPositionUs, playbackParameters.speed);
|
||||
}
|
||||
|
||||
private long applySkipping(long positionUs) {
|
||||
return positionUs + framesToDurationUs(silenceSkippingAudioProcessor.getSkippedFrames());
|
||||
}
|
||||
|
||||
private boolean isInitialized() {
|
||||
return audioTrack != null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,411 @@
|
|||
/*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.google.android.exoplayer2.audio;
|
||||
|
||||
import android.support.annotation.IntDef;
|
||||
import com.google.android.exoplayer2.C;
|
||||
import com.google.android.exoplayer2.Format;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* An {@link AudioProcessor} that skips silence in the input stream. Input and output are 16-bit
|
||||
* PCM.
|
||||
*/
|
||||
/* package */ final class SilenceSkippingAudioProcessor implements AudioProcessor {
|
||||
|
||||
/**
|
||||
* The minimum duration of audio that must be below {@link #SILENCE_THRESHOLD_LEVEL} to classify
|
||||
* that part of audio as silent, in microseconds.
|
||||
*/
|
||||
private static final long MINIMUM_SILENCE_DURATION_US = 100_000;
|
||||
/**
|
||||
* The duration of silence by which to extend non-silent sections, in microseconds. The value must
|
||||
* not exceed {@link #MINIMUM_SILENCE_DURATION_US}.
|
||||
*/
|
||||
private static final long PADDING_SILENCE_US = 10_000;
|
||||
/**
|
||||
* The absolute level below which an individual PCM sample is classified as silent. Note: the
|
||||
* specified value will be rounded so that the threshold check only depends on the more
|
||||
* significant byte, for efficiency.
|
||||
*/
|
||||
private static final short SILENCE_THRESHOLD_LEVEL = 1024;
|
||||
|
||||
/**
|
||||
* Threshold for classifying an individual PCM sample as silent based on its more significant
|
||||
* byte. This is {@link #SILENCE_THRESHOLD_LEVEL} divided by 256 with rounding.
|
||||
*/
|
||||
private static final byte SILENCE_THRESHOLD_LEVEL_MSB = (SILENCE_THRESHOLD_LEVEL + 128) >> 8;
|
||||
|
||||
/** Trimming states. */
|
||||
@Retention(RetentionPolicy.SOURCE)
|
||||
@IntDef({
|
||||
STATE_NOISY,
|
||||
STATE_MAYBE_SILENT,
|
||||
STATE_SILENT,
|
||||
})
|
||||
private @interface State {}
|
||||
/** State when the input is not silent. */
|
||||
private static final int STATE_NOISY = 0;
|
||||
/** State when the input may be silent but we haven't read enough yet to know. */
|
||||
private static final int STATE_MAYBE_SILENT = 1;
|
||||
/** State when the input is silent. */
|
||||
private static final int STATE_SILENT = 2;
|
||||
|
||||
private int channelCount;
|
||||
private int sampleRateHz;
|
||||
private int bytesPerFrame;
|
||||
|
||||
private boolean enabled;
|
||||
private boolean isActive;
|
||||
|
||||
private ByteBuffer buffer;
|
||||
private ByteBuffer outputBuffer;
|
||||
private boolean inputEnded;
|
||||
|
||||
/**
|
||||
* Buffers audio data that may be classified as silence while in {@link #STATE_MAYBE_SILENT}. If
|
||||
* the input becomes noisy before the buffer has filled, it will be output. Otherwise, the buffer
|
||||
* contents will be dropped and the state will transition to {@link #STATE_SILENT}.
|
||||
*/
|
||||
private byte[] maybeSilenceBuffer;
|
||||
|
||||
/**
|
||||
* Stores the latest part of the input while silent. It will be output as padding if the next
|
||||
* input is noisy.
|
||||
*/
|
||||
private byte[] paddingBuffer;
|
||||
|
||||
private @State int state;
|
||||
private int maybeSilenceBufferSize;
|
||||
private int paddingSize;
|
||||
private boolean hasOutputNoise;
|
||||
private long skippedFrames;
|
||||
|
||||
/** Creates a new silence trimming audio processor. */
|
||||
public SilenceSkippingAudioProcessor() {
|
||||
buffer = EMPTY_BUFFER;
|
||||
outputBuffer = EMPTY_BUFFER;
|
||||
channelCount = Format.NO_VALUE;
|
||||
sampleRateHz = Format.NO_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether to skip silence in the input. After calling this method, call {@link
|
||||
* #configure(int, int, int)} to apply the new setting.
|
||||
*
|
||||
* @param enabled Whether to skip silence in the input.
|
||||
*/
|
||||
public void setEnabled(boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of frames of input audio that were skipped due to being classified as
|
||||
* silence since the last call to {@link #flush()}.
|
||||
*/
|
||||
public long getSkippedFrames() {
|
||||
return skippedFrames;
|
||||
}
|
||||
|
||||
// AudioProcessor implementation.
|
||||
|
||||
@Override
|
||||
public boolean configure(int sampleRateHz, int channelCount, int encoding)
|
||||
throws UnhandledFormatException {
|
||||
if (encoding != C.ENCODING_PCM_16BIT) {
|
||||
throw new UnhandledFormatException(sampleRateHz, channelCount, encoding);
|
||||
}
|
||||
|
||||
boolean wasActive = isActive;
|
||||
isActive = enabled;
|
||||
if (!isActive) {
|
||||
return wasActive;
|
||||
}
|
||||
|
||||
if (wasActive && this.sampleRateHz == sampleRateHz && this.channelCount == channelCount) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.sampleRateHz = sampleRateHz;
|
||||
this.channelCount = channelCount;
|
||||
bytesPerFrame = channelCount * 2;
|
||||
int maybeSilenceBufferSize = durationUsToFrames(MINIMUM_SILENCE_DURATION_US) * bytesPerFrame;
|
||||
if (maybeSilenceBuffer == null || maybeSilenceBuffer.length != maybeSilenceBufferSize) {
|
||||
maybeSilenceBuffer = new byte[maybeSilenceBufferSize];
|
||||
}
|
||||
paddingSize = durationUsToFrames(PADDING_SILENCE_US) * bytesPerFrame;
|
||||
paddingBuffer = new byte[paddingSize];
|
||||
state = STATE_NOISY;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isActive() {
|
||||
return isActive;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOutputChannelCount() {
|
||||
return channelCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @C.Encoding int getOutputEncoding() {
|
||||
return C.ENCODING_PCM_16BIT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOutputSampleRateHz() {
|
||||
return sampleRateHz;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void queueInput(ByteBuffer inputBuffer) {
|
||||
while (inputBuffer.hasRemaining() && !outputBuffer.hasRemaining()) {
|
||||
switch (state) {
|
||||
case STATE_NOISY:
|
||||
processNoisy(inputBuffer);
|
||||
break;
|
||||
case STATE_MAYBE_SILENT:
|
||||
processMaybeSilence(inputBuffer);
|
||||
break;
|
||||
case STATE_SILENT:
|
||||
processSilence(inputBuffer);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void queueEndOfStream() {
|
||||
inputEnded = true;
|
||||
if (maybeSilenceBufferSize > 0) {
|
||||
// We haven't received enough silence to transition to the silent state, so output the buffer.
|
||||
output(maybeSilenceBuffer, maybeSilenceBufferSize);
|
||||
}
|
||||
if (!hasOutputNoise) {
|
||||
skippedFrames += paddingSize / bytesPerFrame;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getOutput() {
|
||||
ByteBuffer outputBuffer = this.outputBuffer;
|
||||
this.outputBuffer = EMPTY_BUFFER;
|
||||
return outputBuffer;
|
||||
}
|
||||
|
||||
@SuppressWarnings("ReferenceEquality")
|
||||
@Override
|
||||
public boolean isEnded() {
|
||||
return inputEnded && outputBuffer == EMPTY_BUFFER;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
outputBuffer = EMPTY_BUFFER;
|
||||
inputEnded = false;
|
||||
skippedFrames = 0;
|
||||
maybeSilenceBufferSize = 0;
|
||||
hasOutputNoise = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
flush();
|
||||
buffer = EMPTY_BUFFER;
|
||||
channelCount = Format.NO_VALUE;
|
||||
sampleRateHz = Format.NO_VALUE;
|
||||
maybeSilenceBuffer = null;
|
||||
}
|
||||
|
||||
// Internal methods.
|
||||
|
||||
/**
|
||||
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY},
|
||||
* updating the state if needed.
|
||||
*/
|
||||
private void processNoisy(ByteBuffer inputBuffer) {
|
||||
int limit = inputBuffer.limit();
|
||||
|
||||
// Check if there's any noise within the maybe silence buffer duration.
|
||||
inputBuffer.limit(Math.min(limit, inputBuffer.position() + maybeSilenceBuffer.length));
|
||||
int noiseLimit = findNoiseLimit(inputBuffer);
|
||||
if (noiseLimit == inputBuffer.position()) {
|
||||
// The buffer contains the start of possible silence.
|
||||
state = STATE_MAYBE_SILENT;
|
||||
} else {
|
||||
inputBuffer.limit(noiseLimit);
|
||||
output(inputBuffer);
|
||||
}
|
||||
|
||||
// Restore the limit.
|
||||
inputBuffer.limit(limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Incrementally processes new input from {@code inputBuffer} while in {@link
|
||||
* #STATE_MAYBE_SILENT}, updating the state if needed.
|
||||
*/
|
||||
private void processMaybeSilence(ByteBuffer inputBuffer) {
|
||||
int limit = inputBuffer.limit();
|
||||
int noisePosition = findNoisePosition(inputBuffer);
|
||||
int maybeSilenceInputSize = noisePosition - inputBuffer.position();
|
||||
int maybeSilenceBufferRemaining = maybeSilenceBuffer.length - maybeSilenceBufferSize;
|
||||
if (noisePosition < limit && maybeSilenceInputSize < maybeSilenceBufferRemaining) {
|
||||
// The maybe silence buffer isn't full, so output it and switch back to the noisy state.
|
||||
output(maybeSilenceBuffer, maybeSilenceBufferSize);
|
||||
maybeSilenceBufferSize = 0;
|
||||
state = STATE_NOISY;
|
||||
} else {
|
||||
// Fill as much of the maybe silence buffer as possible.
|
||||
int bytesToWrite = Math.min(maybeSilenceInputSize, maybeSilenceBufferRemaining);
|
||||
inputBuffer.limit(inputBuffer.position() + bytesToWrite);
|
||||
inputBuffer.get(maybeSilenceBuffer, maybeSilenceBufferSize, bytesToWrite);
|
||||
maybeSilenceBufferSize += bytesToWrite;
|
||||
if (maybeSilenceBufferSize == maybeSilenceBuffer.length) {
|
||||
// We've reached a period of silence, so skip it, taking in to account padding for both
|
||||
// the noisy to silent transition and any future silent to noisy transition.
|
||||
if (hasOutputNoise) {
|
||||
output(maybeSilenceBuffer, paddingSize);
|
||||
skippedFrames += (maybeSilenceBufferSize - paddingSize * 2) / bytesPerFrame;
|
||||
} else {
|
||||
skippedFrames += (maybeSilenceBufferSize - paddingSize) / bytesPerFrame;
|
||||
}
|
||||
updatePaddingBuffer(inputBuffer, maybeSilenceBuffer, maybeSilenceBufferSize);
|
||||
maybeSilenceBufferSize = 0;
|
||||
state = STATE_SILENT;
|
||||
}
|
||||
|
||||
// Restore the limit.
|
||||
inputBuffer.limit(limit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_SILENT},
|
||||
* updating the state if needed.
|
||||
*/
|
||||
private void processSilence(ByteBuffer inputBuffer) {
|
||||
int limit = inputBuffer.limit();
|
||||
int noisyPosition = findNoisePosition(inputBuffer);
|
||||
inputBuffer.limit(noisyPosition);
|
||||
skippedFrames += inputBuffer.remaining() / bytesPerFrame;
|
||||
updatePaddingBuffer(inputBuffer, paddingBuffer, paddingSize);
|
||||
if (noisyPosition < limit) {
|
||||
// Output the padding, which may include previous input as well as new input, then transition
|
||||
// back to the noisy state.
|
||||
output(paddingBuffer, paddingSize);
|
||||
state = STATE_NOISY;
|
||||
|
||||
// Restore the limit.
|
||||
inputBuffer.limit(limit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies {@code length} elements from {@code data} to populate a new output buffer from the
|
||||
* processor.
|
||||
*/
|
||||
private void output(byte[] data, int length) {
|
||||
prepareForOutput(length);
|
||||
buffer.put(data, 0, length);
|
||||
buffer.flip();
|
||||
outputBuffer = buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies remaining bytes from {@code data} to populate a new output buffer from the processor.
|
||||
*/
|
||||
private void output(ByteBuffer data) {
|
||||
prepareForOutput(data.remaining());
|
||||
buffer.put(data);
|
||||
buffer.flip();
|
||||
outputBuffer = buffer;
|
||||
}
|
||||
|
||||
/** Prepares to output {@code size} bytes in {@code buffer}. */
|
||||
private void prepareForOutput(int size) {
|
||||
if (buffer.capacity() < size) {
|
||||
buffer = ByteBuffer.allocateDirect(size).order(ByteOrder.nativeOrder());
|
||||
} else {
|
||||
buffer.clear();
|
||||
}
|
||||
if (size > 0) {
|
||||
hasOutputNoise = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills {@link #paddingBuffer} using data from {@code input}, plus any additional buffered data
|
||||
* at the end of {@code buffer} (up to its {@code size}) required to fill it, advancing the input
|
||||
* position.
|
||||
*/
|
||||
private void updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size) {
|
||||
int fromInputSize = Math.min(input.remaining(), paddingSize);
|
||||
int fromBufferSize = paddingSize - fromInputSize;
|
||||
System.arraycopy(
|
||||
/* src= */ buffer,
|
||||
/* srcPos= */ size - fromBufferSize,
|
||||
/* dest= */ paddingBuffer,
|
||||
/* destPos= */ 0,
|
||||
/* length= */ fromBufferSize);
|
||||
input.position(input.limit() - fromInputSize);
|
||||
input.get(paddingBuffer, fromBufferSize, fromInputSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of input frames corresponding to {@code durationUs} microseconds of audio.
|
||||
*/
|
||||
private int durationUsToFrames(long durationUs) {
|
||||
return (int) ((durationUs * sampleRateHz) / C.MICROS_PER_SECOND);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the earliest byte position in [position, limit) of {@code buffer} that contains a frame
|
||||
* classified as a noisy frame, or the limit of the buffer if no such frame exists.
|
||||
*/
|
||||
private int findNoisePosition(ByteBuffer buffer) {
|
||||
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
||||
for (int i = buffer.position() + 1; i < buffer.limit(); i += 2) {
|
||||
if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) {
|
||||
// Round to the start of the frame.
|
||||
return bytesPerFrame * (i / bytesPerFrame);
|
||||
}
|
||||
}
|
||||
return buffer.limit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the earliest byte position in [position, limit) of {@code buffer} such that all frames
|
||||
* from the byte position to the limit are classified as silent.
|
||||
*/
|
||||
private int findNoiseLimit(ByteBuffer buffer) {
|
||||
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
||||
for (int i = buffer.limit() - 1; i >= buffer.position(); i -= 2) {
|
||||
if (Math.abs(buffer.get(i)) > SILENCE_THRESHOLD_LEVEL_MSB) {
|
||||
// Return the start of the next frame.
|
||||
return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame;
|
||||
}
|
||||
}
|
||||
return buffer.position();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
/*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.google.android.exoplayer2.audio;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
|
||||
import com.google.android.exoplayer2.C;
|
||||
import com.google.android.exoplayer2.audio.AudioProcessor.UnhandledFormatException;
|
||||
import com.google.android.exoplayer2.util.Assertions;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ShortBuffer;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.robolectric.RobolectricTestRunner;
|
||||
|
||||
/** Unit tests for {@link SilenceSkippingAudioProcessor}. */
|
||||
@RunWith(RobolectricTestRunner.class)
|
||||
public final class SilenceSkippingAudioProcessorTest {
|
||||
|
||||
private static final int TEST_SIGNAL_SAMPLE_RATE_HZ = 1000;
|
||||
private static final int TEST_SIGNAL_CHANNEL_COUNT = 2;
|
||||
private static final int TEST_SIGNAL_SILENCE_DURATION_MS = 1000;
|
||||
private static final int TEST_SIGNAL_NOISE_DURATION_MS = 1000;
|
||||
private static final int TEST_SIGNAL_FRAME_COUNT = 100000;
|
||||
|
||||
private static final int INPUT_BUFFER_SIZE = 100;
|
||||
|
||||
private SilenceSkippingAudioProcessor silenceSkippingAudioProcessor;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
silenceSkippingAudioProcessor = new SilenceSkippingAudioProcessor();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEnabledProcessor_isActive() throws Exception {
|
||||
// Given an enabled processor.
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
|
||||
// When configuring it.
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
|
||||
// It's active.
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDisabledProcessor_isNotActive() throws Exception {
|
||||
// Given a disabled processor.
|
||||
silenceSkippingAudioProcessor.setEnabled(false);
|
||||
|
||||
// When configuring it.
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
|
||||
// It's not active.
|
||||
assertThat(reconfigured).isFalse();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultProcessor_isNotEnabled() throws Exception {
|
||||
// Given a processor in its default state.
|
||||
// When reconfigured.
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
|
||||
// It's not active.
|
||||
assertThat(reconfigured).isFalse();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChangingSampleRate_requiresReconfiguration() throws Exception {
|
||||
// Given an enabled processor and configured processor.
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
|
||||
// When reconfiguring it with a different sample rate.
|
||||
reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ * 2, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
|
||||
// It's reconfigured.
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReconfiguringWithSameSampleRate_doesNotRequireReconfiguration() throws Exception {
|
||||
// Given an enabled processor and configured processor.
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
|
||||
// When reconfiguring it with the same sample rate.
|
||||
reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
|
||||
// It's not reconfigured but it is active.
|
||||
assertThat(reconfigured).isFalse();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipInSilentSignal_skipsEverything() throws Exception {
|
||||
// Given a signal with only noise.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
TEST_SIGNAL_SILENCE_DURATION_MS,
|
||||
/* noiseDurationMs= */ 0,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal.
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
long totalOutputFrames =
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||
|
||||
// The entire signal is skipped.
|
||||
assertThat(totalOutputFrames).isEqualTo(0);
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(TEST_SIGNAL_FRAME_COUNT);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipInNoisySignal_skipsNothing() throws Exception {
|
||||
// Given a signal with only silence.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
/* silenceDurationMs= */ 0,
|
||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal.
|
||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||
new SilenceSkippingAudioProcessor();
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
long totalOutputFrames =
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||
|
||||
// None of the signal is skipped.
|
||||
assertThat(totalOutputFrames).isEqualTo(TEST_SIGNAL_FRAME_COUNT);
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipInAlternatingTestSignal_hasCorrectOutputAndSkippedFrameCounts()
|
||||
throws Exception {
|
||||
// Given a signal that alternates between silence and noise.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
TEST_SIGNAL_SILENCE_DURATION_MS,
|
||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal.
|
||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||
new SilenceSkippingAudioProcessor();
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
long totalOutputFrames =
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||
|
||||
// The right number of frames are skipped/output.
|
||||
assertThat(totalOutputFrames).isEqualTo(53990);
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(46010);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipWithSmallerInputBufferSize_hasCorrectOutputAndSkippedFrameCounts()
|
||||
throws Exception {
|
||||
// Given a signal that alternates between silence and noise.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
TEST_SIGNAL_SILENCE_DURATION_MS,
|
||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal with a smaller input buffer size.
|
||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||
new SilenceSkippingAudioProcessor();
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
long totalOutputFrames =
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 80);
|
||||
|
||||
// The right number of frames are skipped/output.
|
||||
assertThat(totalOutputFrames).isEqualTo(53990);
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(46010);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipWithLargerInputBufferSize_hasCorrectOutputAndSkippedFrameCounts()
|
||||
throws Exception {
|
||||
// Given a signal that alternates between silence and noise.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
TEST_SIGNAL_SILENCE_DURATION_MS,
|
||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal with a larger input buffer size.
|
||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||
new SilenceSkippingAudioProcessor();
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
long totalOutputFrames =
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120);
|
||||
|
||||
// The right number of frames are skipped/output.
|
||||
assertThat(totalOutputFrames).isEqualTo(53990);
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(46010);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipThenFlush_resetsSkippedFrameCount() throws Exception {
|
||||
// Given a signal that alternates between silence and noise.
|
||||
InputBufferProvider inputBufferProvider =
|
||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ,
|
||||
TEST_SIGNAL_CHANNEL_COUNT,
|
||||
TEST_SIGNAL_SILENCE_DURATION_MS,
|
||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||
TEST_SIGNAL_FRAME_COUNT);
|
||||
|
||||
// When processing the entire signal then flushing.
|
||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||
new SilenceSkippingAudioProcessor();
|
||||
silenceSkippingAudioProcessor.setEnabled(true);
|
||||
boolean reconfigured =
|
||||
silenceSkippingAudioProcessor.configure(
|
||||
TEST_SIGNAL_SAMPLE_RATE_HZ, TEST_SIGNAL_CHANNEL_COUNT, C.ENCODING_PCM_16BIT);
|
||||
assertThat(reconfigured).isTrue();
|
||||
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||
silenceSkippingAudioProcessor.flush();
|
||||
|
||||
// The skipped frame count is zero.
|
||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the entire stream provided by {@code inputBufferProvider} in chunks of {@code
|
||||
* inputBufferSize} and returns the total number of output frames.
|
||||
*/
|
||||
private static long process(
|
||||
SilenceSkippingAudioProcessor processor,
|
||||
InputBufferProvider inputBufferProvider,
|
||||
int inputBufferSize)
|
||||
throws UnhandledFormatException {
|
||||
long totalOutputFrames = 0;
|
||||
while (inputBufferProvider.hasRemaining()) {
|
||||
ByteBuffer inputBuffer = inputBufferProvider.getNextInputBuffer(inputBufferSize);
|
||||
while (inputBuffer.hasRemaining()) {
|
||||
processor.queueInput(inputBuffer);
|
||||
ByteBuffer outputBuffer = processor.getOutput();
|
||||
totalOutputFrames += outputBuffer.remaining() / (2 * processor.getOutputChannelCount());
|
||||
outputBuffer.clear();
|
||||
}
|
||||
}
|
||||
processor.queueEndOfStream();
|
||||
while (!processor.isEnded()) {
|
||||
ByteBuffer outputBuffer = processor.getOutput();
|
||||
totalOutputFrames += outputBuffer.remaining() / (2 * processor.getOutputChannelCount());
|
||||
outputBuffer.clear();
|
||||
}
|
||||
return totalOutputFrames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an {@link InputBufferProvider} that provides input buffers for a stream that alternates
|
||||
* between silence/noise of the specified durations to fill {@code totalFrameCount}.
|
||||
*/
|
||||
private static InputBufferProvider getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||
int sampleRateHz,
|
||||
int channelCount,
|
||||
int silenceDurationMs,
|
||||
int noiseDurationMs,
|
||||
int totalFrameCount) {
|
||||
Pcm16BitAudioBuilder audioBuilder = new Pcm16BitAudioBuilder(channelCount, totalFrameCount);
|
||||
while (!audioBuilder.isFull()) {
|
||||
int silenceDurationFrames = (silenceDurationMs * sampleRateHz) / 1000;
|
||||
audioBuilder.appendFrames(/* count= */ silenceDurationFrames, /* channelLevels= */ (short) 0);
|
||||
int noiseDurationFrames = (noiseDurationMs * sampleRateHz) / 1000;
|
||||
audioBuilder.appendFrames(
|
||||
/* count= */ noiseDurationFrames, /* channelLevels= */ Short.MAX_VALUE);
|
||||
}
|
||||
return new InputBufferProvider(audioBuilder.build());
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a {@link ShortBuffer} and provides a sequence of {@link ByteBuffer}s of specified sizes
|
||||
* that contain copies of its data.
|
||||
*/
|
||||
private static final class InputBufferProvider {
|
||||
|
||||
private final ShortBuffer buffer;
|
||||
|
||||
public InputBufferProvider(ShortBuffer buffer) {
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
/** Returns the next buffer with size up to {@code sizeBytes}. */
|
||||
public ByteBuffer getNextInputBuffer(int sizeBytes) {
|
||||
ByteBuffer inputBuffer = ByteBuffer.allocate(sizeBytes).order(ByteOrder.nativeOrder());
|
||||
ShortBuffer inputBufferAsShortBuffer = inputBuffer.asShortBuffer();
|
||||
int limit = buffer.limit();
|
||||
buffer.limit(Math.min(buffer.position() + sizeBytes / 2, limit));
|
||||
inputBufferAsShortBuffer.put(buffer);
|
||||
buffer.limit(limit);
|
||||
inputBuffer.limit(inputBufferAsShortBuffer.position() * 2);
|
||||
return inputBuffer;
|
||||
}
|
||||
|
||||
/** Returns whether any more input can be provided via {@link #getNextInputBuffer(int)}. */
|
||||
public boolean hasRemaining() {
|
||||
return buffer.hasRemaining();
|
||||
}
|
||||
}
|
||||
|
||||
/** Builder for {@link ShortBuffer}s that contain 16-bit PCM audio samples. */
|
||||
private static final class Pcm16BitAudioBuilder {
|
||||
|
||||
private final int channelCount;
|
||||
private final ShortBuffer buffer;
|
||||
|
||||
private boolean built;
|
||||
|
||||
public Pcm16BitAudioBuilder(int channelCount, int frameCount) {
|
||||
this.channelCount = channelCount;
|
||||
buffer = ByteBuffer.allocate(frameCount * channelCount * 2).asShortBuffer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends {@code count} audio frames, using the specified {@code channelLevels} in each frame.
|
||||
*/
|
||||
public void appendFrames(int count, short... channelLevels) {
|
||||
Assertions.checkState(!built);
|
||||
for (int i = 0; i < count; i += channelCount) {
|
||||
for (int j = 0; j < channelLevels.length; j++) {
|
||||
buffer.put(channelLevels[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns whether the buffer is full. */
|
||||
public boolean isFull() {
|
||||
Assertions.checkState(!built);
|
||||
return !buffer.hasRemaining();
|
||||
}
|
||||
|
||||
/** Returns the built buffer. After calling this method the builder should not be reused. */
|
||||
public ShortBuffer build() {
|
||||
Assertions.checkState(!built);
|
||||
built = true;
|
||||
buffer.flip();
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue