Echo Cancellation

This commit is contained in:
Marvin W 2021-05-01 15:19:05 +02:00
parent 6b976cdb66
commit 23ffd37dde
No known key found for this signature in database
GPG key ID: 072E9235DB996F2A
8 changed files with 385 additions and 19 deletions

View file

@ -2,11 +2,11 @@ cmake_minimum_required(VERSION 3.3)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
include(ComputeVersion)
if (NOT VERSION_FOUND)
project(Dino LANGUAGES C)
project(Dino LANGUAGES C CXX)
elseif (VERSION_IS_RELEASE)
project(Dino VERSION ${VERSION_FULL} LANGUAGES C)
project(Dino VERSION ${VERSION_FULL} LANGUAGES C CXX)
else ()
project(Dino LANGUAGES C)
project(Dino LANGUAGES C CXX)
set(PROJECT_VERSION ${VERSION_FULL})
endif ()

14
cmake/FindGstAudio.cmake Normal file
View file

@ -0,0 +1,14 @@
include(PkgConfigWithFallback)
find_pkg_config_with_fallback(GstAudio
PKG_CONFIG_NAME gstreamer-audio-1.0
LIB_NAMES gstaudio
LIB_DIR_HINTS gstreamer-1.0
INCLUDE_NAMES gst/audio/audio.h
INCLUDE_DIR_SUFFIXES gstreamer-1.0 gstreamer-1.0/include gstreamer-audio-1.0 gstreamer-audio-1.0/include
DEPENDS Gst
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GstAudio
REQUIRED_VARS GstAudio_LIBRARY
VERSION_VAR GstAudio_VERSION)

View file

@ -0,0 +1,12 @@
include(PkgConfigWithFallback)
find_pkg_config_with_fallback(WebRTCAudioProcessing
PKG_CONFIG_NAME webrtc-audio-processing
LIB_NAMES webrtc_audio_processing
INCLUDE_NAMES webrtc/modules/audio_processing/include/audio_processing.h
INCLUDE_DIR_SUFFIXES webrtc-audio-processing webrtc_audio_processing
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WebRTCAudioProcessing
REQUIRED_VARS WebRTCAudioProcessing_LIBRARY
VERSION_VAR WebRTCAudioProcessing_VERSION)

View file

@ -1,4 +1,5 @@
find_package(GstRtp REQUIRED)
find_package(WebRTCAudioProcessing 0.2)
find_packages(RTP_PACKAGES REQUIRED
Gee
GLib
@ -8,12 +9,26 @@ find_packages(RTP_PACKAGES REQUIRED
GTK3
Gst
GstApp
GstAudio
)
if(Gst_VERSION VERSION_GREATER "1.16")
set(RTP_DEFINITIONS GST_1_16)
endif()
if(WebRTCAudioProcessing_VERSION GREATER "0.4")
message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far")
unset(WebRTCAudioProcessing_FOUND)
endif()
if(WebRTCAudioProcessing_FOUND)
set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR)
set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala)
set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp)
else()
message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!")
endif()
vala_precompile(RTP_VALA_C
SOURCES
src/codec_util.vala
@ -23,6 +38,7 @@ SOURCES
src/stream.vala
src/video_widget.vala
src/register_plugin.vala
${RTP_VOICE_PROCESSOR_VALA}
CUSTOM_VAPIS
${CMAKE_BINARY_DIR}/exports/crypto-vala.vapi
${CMAKE_BINARY_DIR}/exports/xmpp-vala.vapi
@ -36,8 +52,8 @@ DEFINITIONS
)
add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src)
add_library(rtp SHARED ${RTP_VALA_C})
target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0)
add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX})
target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing)
set_target_properties(rtp PROPERTIES PREFIX "")
set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/)

View file

@ -37,6 +37,7 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
private Gst.Element dsp;
private Gst.Element mixer;
private Gst.Element filter;
private Gst.Element rate;
private int links = 0;
public Device(Plugin plugin, Gst.Device device) {
@ -132,13 +133,11 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
pipe.add(filter);
element.link(filter);
if (media == "audio" && plugin.echoprobe != null) {
dsp = Gst.ElementFactory.make("webrtcdsp", @"dsp_$id");
if (dsp != null) {
dsp.@set("probe", plugin.echoprobe.name);
dsp = new VoiceProcessor(plugin.echoprobe, element as Gst.Audio.StreamVolume);
dsp.name = @"dsp_$id";
pipe.add(dsp);
filter.link(dsp);
}
}
tee = Gst.ElementFactory.make("tee", @"tee_$id");
tee.@set("allow-not-linked", true);
pipe.add(tee);
@ -153,7 +152,11 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
filter.@set("caps", get_best_caps());
pipe.add(filter);
if (plugin.echoprobe != null) {
filter.link(plugin.echoprobe);
rate = Gst.ElementFactory.make("audiorate", @"rate_$id");
rate.@set("tolerance", 100000000);
pipe.add(rate);
filter.link(rate);
rate.link(plugin.echoprobe);
plugin.echoprobe.link(element);
} else {
filter.link(element);
@ -184,14 +187,17 @@ public class Dino.Plugins.Rtp.Device : MediaDevice, Object {
if (filter != null) {
filter.set_locked_state(true);
filter.set_state(Gst.State.NULL);
if (plugin.echoprobe != null) {
filter.unlink(plugin.echoprobe);
} else {
filter.unlink(element);
}
filter.unlink(rate ?? ((Gst.Element)plugin.echoprobe) ?? element);
pipe.remove(filter);
filter = null;
}
if (rate != null) {
rate.set_locked_state(true);
rate.set_state(Gst.State.NULL);
rate.unlink(plugin.echoprobe);
pipe.remove(rate);
rate = null;
}
if (plugin.echoprobe != null) {
plugin.echoprobe.unlink(element);
}

View file

@ -8,7 +8,7 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
public Gst.DeviceMonitor device_monitor { get; private set; }
public Gst.Pipeline pipe { get; private set; }
public Gst.Bin rtpbin { get; private set; }
public Gst.Element echoprobe { get; private set; }
public EchoProbe echoprobe { get; private set; }
private Gee.List<Stream> streams = new ArrayList<Stream>();
private Gee.List<Device> devices = new ArrayList<Device>();
@ -72,7 +72,8 @@ public class Dino.Plugins.Rtp.Plugin : RootInterface, VideoCallPlugin, Object {
pipe.add(rtpbin);
// Audio echo probe
echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
// echoprobe = Gst.ElementFactory.make("webrtcechoprobe", "echo-probe");
echoprobe = new EchoProbe();
if (echoprobe != null) pipe.add(echoprobe);
// Pipeline

View file

@ -0,0 +1,176 @@
using Gst;
namespace Dino.Plugins.Rtp {
public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf);
}
public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter {
private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
public Audio.Info audio_info { get; private set; }
public signal void on_new_buffer(Buffer buffer);
private uint period_samples;
private uint period_size;
private Base.Adapter adapter = new Base.Adapter();
static construct {
add_static_pad_template(sink_template);
add_static_pad_template(src_template);
set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team <contact@dino.im>");
}
construct {
set_passthrough(true);
}
public override bool setup(Audio.Info info) {
audio_info = info;
period_samples = info.rate / 100; // 10ms buffers
period_size = period_samples * info.bpf;
return true;
}
public override FlowReturn transform_ip(Buffer buf) {
lock (adapter) {
adapter.push(adjust_to_running_time(this, buf));
while (adapter.available() > period_size) {
on_new_buffer(adapter.take_buffer(period_size));
}
}
return FlowReturn.OK;
}
public override bool stop() {
adapter.clear();
return true;
}
}
public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter {
private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
public Audio.Info audio_info { get; private set; }
private ulong process_outgoing_buffer_handler_id;
private uint adjust_delay_timeout_id;
private uint period_samples;
private uint period_size;
private Base.Adapter adapter = new Base.Adapter();
private EchoProbe? echo_probe;
private Audio.StreamVolume? stream_volume;
private ClockTime last_reverse;
private void* native;
static construct {
add_static_pad_template(sink_template);
add_static_pad_template(src_template);
set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team <contact@dino.im>");
}
construct {
set_passthrough(false);
}
public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) {
this.echo_probe = echo_probe;
this.stream_volume = stream_volume;
}
private static extern void* init_native(int stream_delay);
private static extern void setup_native(void* native);
private static extern void destroy_native(void* native);
private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer);
private static extern void process_stream(void* native, Audio.Info info, Buffer buffer);
private static extern void adjust_stream_delay(void* native);
private static extern void notify_gain_level(void* native, int gain_level);
private static extern int get_suggested_gain_level(void* native);
private static extern bool get_stream_has_voice(void* native);
public override bool setup(Audio.Info info) {
debug("VoiceProcessor.setup(%s)", info.to_caps().to_string());
audio_info = info;
period_samples = info.rate / 100; // 10ms buffers
period_size = period_samples * info.bpf;
adapter.clear();
setup_native(native);
return true;
}
public override bool start() {
native = init_native(150);
if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) {
process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer);
}
if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) {
stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume;
}
return true;
}
private bool adjust_delay() {
if (native != null) {
adjust_stream_delay(native);
return Source.CONTINUE;
} else {
adjust_delay_timeout_id = 0;
return Source.REMOVE;
}
}
private void process_outgoing_buffer(Buffer buffer) {
if (buffer.pts != uint64.MAX) {
last_reverse = buffer.pts;
}
analyze_reverse_stream(native, echo_probe.audio_info, buffer);
if (adjust_delay_timeout_id == 0 && echo_probe != null) {
adjust_delay_timeout_id = Timeout.add(5000, adjust_delay);
}
}
public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) {
lock (adapter) {
if (is_discont) {
adapter.clear();
}
adapter.push(adjust_to_running_time(this, input));
}
return FlowReturn.OK;
}
public override FlowReturn generate_output(out Buffer output_buffer) {
lock (adapter) {
if (adapter.available() >= period_size) {
output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable();
int old_gain_level = 0;
if (stream_volume != null) {
old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0);
notify_gain_level(native, old_gain_level);
}
process_stream(native, audio_info, output_buffer);
if (stream_volume != null) {
int new_gain_level = get_suggested_gain_level(native);
if (old_gain_level != new_gain_level) {
debug("Gain: %i -> %i", old_gain_level, new_gain_level);
stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0);
}
}
}
}
return FlowReturn.OK;
}
public override bool stop() {
if (process_outgoing_buffer_handler_id != 0) {
echo_probe.disconnect(process_outgoing_buffer_handler_id);
process_outgoing_buffer_handler_id = 0;
}
if (adjust_delay_timeout_id != 0) {
Source.remove(adjust_delay_timeout_id);
adjust_delay_timeout_id = 0;
}
adapter.clear();
destroy_native(native);
native = null;
return true;
}
}

View file

@ -0,0 +1,141 @@
#include <algorithm>
#include <gst/gst.h>
#include <gst/audio/audio.h>
#include <webrtc/modules/audio_processing/include/audio_processing.h>
#include <webrtc/modules/interface/module_common_types.h>
#include <webrtc/system_wrappers/include/trace.h>
#define SAMPLE_RATE 48000
#define SAMPLE_CHANNELS 1
struct _DinoPluginsRtpVoiceProcessorNative {
webrtc::AudioProcessing *apm;
gint stream_delay;
};
extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) {
GstBuffer *copy = gst_buffer_copy(buffer);
GST_BUFFER_PTS(copy) = gst_segment_to_running_time(&transform->segment, GST_FORMAT_TIME, GST_BUFFER_PTS(buffer));
return copy;
}
extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) {
_DinoPluginsRtpVoiceProcessorNative *native = new _DinoPluginsRtpVoiceProcessorNative();
webrtc::Config config;
config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(true));
config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, 85));
native->apm = webrtc::AudioProcessing::Create(config);
native->stream_delay = stream_delay;
return native;
}
extern "C" void dino_plugins_rtp_voice_processor_setup_native(void *native_ptr) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::AudioProcessing *apm = native->apm;
webrtc::ProcessingConfig pconfig;
pconfig.streams[webrtc::ProcessingConfig::kInputStream] =
webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
pconfig.streams[webrtc::ProcessingConfig::kOutputStream] =
webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] =
webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] =
webrtc::StreamConfig(SAMPLE_RATE, SAMPLE_CHANNELS, false);
apm->Initialize(pconfig);
apm->high_pass_filter()->Enable(true);
apm->echo_cancellation()->enable_drift_compensation(false);
apm->echo_cancellation()->set_suppression_level(webrtc::EchoCancellation::kModerateSuppression);
apm->echo_cancellation()->enable_delay_logging(true);
apm->echo_cancellation()->Enable(true);
apm->noise_suppression()->set_level(webrtc::NoiseSuppression::kModerate);
apm->noise_suppression()->Enable(true);
apm->gain_control()->set_analog_level_limits(0, 255);
apm->gain_control()->set_mode(webrtc::GainControl::kAdaptiveAnalog);
apm->gain_control()->set_target_level_dbfs(3);
apm->gain_control()->set_compression_gain_db(9);
apm->gain_control()->enable_limiter(true);
apm->gain_control()->Enable(true);
apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kLowLikelihood);
apm->voice_detection()->Enable(true);
}
extern "C" void
dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
webrtc::AudioProcessing *apm = native->apm;
GstAudioBuffer audio_buffer;
gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ);
webrtc::AudioFrame frame;
frame.num_channels_ = info->channels;
frame.sample_rate_hz_ = info->rate;
frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf;
memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
int err = apm->AnalyzeReverseStream(&frame);
if (err < 0) g_warning("ProcessReverseStream %i", err);
gst_audio_buffer_unmap(&audio_buffer);
}
extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::AudioProcessing *apm = native->apm;
apm->gain_control()->set_stream_analog_level(gain_level);
}
extern "C" gint dino_plugins_rtp_voice_processor_get_suggested_gain_level(void *native_ptr) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::AudioProcessing *apm = native->apm;
return apm->gain_control()->stream_analog_level();
}
extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *native_ptr) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::AudioProcessing *apm = native->apm;
return apm->voice_detection()->stream_has_voice();
}
extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::AudioProcessing *apm = native->apm;
int median, std;
float fraction_poor_delays;
apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays);
if (fraction_poor_delays < 0) return;
g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays);
if (fraction_poor_delays > 0.5) {
native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10)));
g_debug("Adjusted stream delay %i", native->stream_delay);
}
}
extern "C" void
dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *info, GstBuffer *buffer) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
webrtc::AudioProcessing *apm = native->apm;
GstAudioBuffer audio_buffer;
gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE);
webrtc::AudioFrame frame;
frame.num_channels_ = info->channels;
frame.sample_rate_hz_ = info->rate;
frame.samples_per_channel_ = info->rate / 100;
memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
apm->set_stream_delay_ms(native->stream_delay);
int err = apm->ProcessStream(&frame);
if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf);
if (err < 0) g_warning("ProcessStream %i", err);
gst_audio_buffer_unmap(&audio_buffer);
}
extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) {
_DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
delete native;
}