Commit 7d116ce6 authored by Erwan Croze's avatar Erwan Croze 👋🏻 Committed by Simon Morlat

Remove VAD from MSVolume filter + Add VAD interface + Add mswebrtc vad tests

parent 9cd54b7c
...@@ -85,6 +85,7 @@ enum _MSFilterInterfaceId{ ...@@ -85,6 +85,7 @@ enum _MSFilterInterfaceId{
MSFilterAudioCaptureInterface,/**<Interface for audio capture filters*/ MSFilterAudioCaptureInterface,/**<Interface for audio capture filters*/
MSFilterAudioPlaybackInterface,/**Interface for audio playback filters.*/ MSFilterAudioPlaybackInterface,/**Interface for audio playback filters.*/
MSFilterAudioEncoderInterface,/**<Video encoder interface*/ MSFilterAudioEncoderInterface,/**<Video encoder interface*/
MSFilterVADInterface,/**<Voice activity detection interface*/
MSFilterVoidInterface,/**<Void source/sink interface*/ MSFilterVoidInterface,/**<Void source/sink interface*/
}; };
......
...@@ -333,6 +333,25 @@ typedef enum _MSAudioRoute MSAudioRoute; ...@@ -333,6 +333,25 @@ typedef enum _MSAudioRoute MSAudioRoute;
#define MS_AUDIO_ENCODER_GET_CAPABILITIES \ #define MS_AUDIO_ENCODER_GET_CAPABILITIES \
MS_FILTER_METHOD(MSFilterAudioEncoderInterface,4,int) MS_FILTER_METHOD(MSFilterAudioEncoderInterface,4,int)
/** Interface definitions for VAD */
#define MS_VAD_ENABLE_SILENCE_DETECTION \
MS_FILTER_METHOD(MSFilterVADInterface, 0, int)
/* Set the silence duration threshold in ms */
#define MS_VAD_SET_SILENCE_DURATION_THRESHOLD \
MS_FILTER_METHOD(MSFilterVADInterface, 1, unsigned int)
/* Specific to each VAD implementation */
#define MS_VAD_SET_MODE \
MS_FILTER_METHOD(MSFilterVADInterface, 2, int)
#define MS_VAD_EVENT_SILENCE_DETECTED \
MS_FILTER_EVENT_NO_ARG(MSFilterVADInterface, 0)
/* Give the end of silence and duration in ms */
#define MS_VAD_EVENT_SILENCE_ENDED \
MS_FILTER_EVENT(MSFilterVADInterface, 1, unsigned int)
/** Interface definitions for void source/sink */ /** Interface definitions for void source/sink */
#define MS_VOID_SOURCE_SEND_SILENCE \ #define MS_VOID_SOURCE_SEND_SILENCE \
MS_FILTER_METHOD(MSFilterVoidInterface, 0, bool_t) MS_FILTER_METHOD(MSFilterVoidInterface, 0, bool_t)
......
...@@ -81,21 +81,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ...@@ -81,21 +81,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define MS_VOLUME_DB_LOWEST (-120) /*arbitrary value returned when linear volume is 0*/ #define MS_VOLUME_DB_LOWEST (-120) /*arbitrary value returned when linear volume is 0*/
/**
* Enable/disable silence detection
**/
#define MS_VOLUME_ENABLE_SILENCE_DETECTION MS_FILTER_METHOD(MS_VOLUME_ID,20,int)
/**
* Set threshold duration in millisec
**/
#define MS_VOLUME_SET_SILENCE_DURATION_THRESHOLD MS_FILTER_METHOD(MS_VOLUME_ID,21,unsigned int)
/**
* Triggered when we detect a silence during a specified time
**/
#define MS_VOLUME_EVENT_SILENCE_DETECTED MS_FILTER_EVENT_NO_ARG(MS_VOLUME_ID,0)
extern MSFilterDesc ms_volume_desc; extern MSFilterDesc ms_volume_desc;
#endif #endif
...@@ -61,10 +61,6 @@ typedef struct Volume{ ...@@ -61,10 +61,6 @@ typedef struct Volume{
float target_gain; /*the target gain choosed by echo limiter and noise gate*/ float target_gain; /*the target gain choosed by echo limiter and noise gate*/
int sustain_time; /* time in ms for which echo limiter remains active after resuming from speech to silence.*/ int sustain_time; /* time in ms for which echo limiter remains active after resuming from speech to silence.*/
int sustain_dur; int sustain_dur;
unsigned int silence_duration; // silence threshold duration in ms
int silence_detection_enable; // silence detection enabled information
int silence_event_send;
uint64_t last_voice_detection; // last time voice was detected
MSFilter *peer; MSFilter *peer;
#ifdef HAVE_SPEEXDSP #ifdef HAVE_SPEEXDSP
SpeexPreprocessState *speex_pp; SpeexPreprocessState *speex_pp;
...@@ -111,10 +107,6 @@ static void volume_init(MSFilter *f){ ...@@ -111,10 +107,6 @@ static void volume_init(MSFilter *f){
v->ng_floorgain=min_ng_floorgain; v->ng_floorgain=min_ng_floorgain;
v->ng_gain = 1; v->ng_gain = 1;
v->remove_dc=FALSE; v->remove_dc=FALSE;
v->silence_detection_enable = 0;
v->silence_duration = 0;
v->last_voice_detection = 0;
v->silence_event_send = 0;
#ifdef HAVE_SPEEXDSP #ifdef HAVE_SPEEXDSP
v->speex_pp=NULL; v->speex_pp=NULL;
#endif #endif
...@@ -197,25 +189,6 @@ static float volume_agc_process(MSFilter *f, mblk_t *om) { ...@@ -197,25 +189,6 @@ static float volume_agc_process(MSFilter *f, mblk_t *om) {
#endif #endif
static void volume_vad_process(MSFilter *f, mblk_t *om) {
#ifdef HAVE_SPEEXDSP
Volume *v = (Volume*) f->data;
if (v->speex_pp && v->silence_duration > 0) {
// Voice detected
if (speex_preprocess_run(v->speex_pp,(int16_t*)om->b_rptr)) {
v->last_voice_detection = f->ticker->time;
v->silence_event_send = 0;
} else if ((v->last_voice_detection + v->silence_duration) <= f->ticker->time) {
if (!v->silence_event_send) {
ms_filter_notify_no_arg(f, MS_VOLUME_EVENT_SILENCE_DETECTED);
ms_message("Silence event sent.");
}
v->silence_event_send = 1;
}
}
#endif
}
static MS2_INLINE float compute_gain(Volume *v, float energy, float weight) { static MS2_INLINE float compute_gain(Volume *v, float energy, float weight) {
float ret = v->static_gain / (1 + (energy * weight)); float ret = v->static_gain / (1 + (energy * weight));
return ret; return ret;
...@@ -414,18 +387,6 @@ static int volume_remove_dc(MSFilter *f, void *arg){ ...@@ -414,18 +387,6 @@ static int volume_remove_dc(MSFilter *f, void *arg){
return 0; return 0;
} }
static int volume_enable_silence_detection(MSFilter *f, void *arg) {
Volume *v=(Volume*)f->data;
v->silence_detection_enable = *(int*)arg;
return 0;
}
static int volume_set_silence_duration_threshold(MSFilter *f, void *arg) {
Volume *v=(Volume*)f->data;
v->silence_duration = *(unsigned int*)arg;
return 0;
}
static MS2_INLINE int16_t saturate(int val) { static MS2_INLINE int16_t saturate(int val) {
return (val>32767) ? 32767 : ( (val<-32767) ? -32767 : val); return (val>32767) ? 32767 : ( (val<-32767) ? -32767 : val);
} }
...@@ -508,15 +469,12 @@ static void volume_preprocess(MSFilter *f){ ...@@ -508,15 +469,12 @@ static void volume_preprocess(MSFilter *f){
ms_message("AGC is enabled."); ms_message("AGC is enabled.");
} }
#if defined HAVE_SPEEXDSP && !defined MS_FIXED_POINT #if defined HAVE_SPEEXDSP && !defined MS_FIXED_POINT
if (v->speex_pp==NULL && (v->agc_enabled || v->silence_detection_enable)){ if (v->speex_pp==NULL && v->agc_enabled){
int tmp=1; int tmp=1;
v->speex_pp=speex_preprocess_state_init(v->nsamples,v->sample_rate); v->speex_pp=speex_preprocess_state_init(v->nsamples,v->sample_rate);
if (v->agc_enabled && speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_AGC,&tmp)==-1){ if (v->agc_enabled && speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_AGC,&tmp)==-1){
ms_warning("Speex AGC is not available."); ms_warning("Speex AGC is not available.");
} }
if (v->silence_detection_enable && speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_VAD,&tmp)==-1) {
ms_warning("Speex VAD is not available.");
}
tmp=0; tmp=0;
speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_DENOISE,&tmp); speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_DENOISE,&tmp);
speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_DEREVERB,&tmp); speex_preprocess_ctl(v->speex_pp,SPEEX_PREPROCESS_SET_DEREVERB,&tmp);
...@@ -553,7 +511,6 @@ static void volume_process(MSFilter *f){ ...@@ -553,7 +511,6 @@ static void volume_process(MSFilter *f){
* remote speaker. AGC operates fully, too (local speaker close to local mic!); * remote speaker. AGC operates fully, too (local speaker close to local mic!);
* having agc gain reduction also contribute to total reduction makes sense. * having agc gain reduction also contribute to total reduction makes sense.
*/ */
if (v->silence_detection_enable) volume_vad_process(f, m);
if (v->agc_enabled) target_gain/= volume_agc_process(f, m); if (v->agc_enabled) target_gain/= volume_agc_process(f, m);
if (v->noise_gate_enabled) volume_noise_gate_process(v, v->instant_energy, m); if (v->noise_gate_enabled) volume_noise_gate_process(v, v->instant_energy, m);
apply_gain(v, m, target_gain); apply_gain(v, m, target_gain);
...@@ -565,7 +522,6 @@ static void volume_process(MSFilter *f){ ...@@ -565,7 +522,6 @@ static void volume_process(MSFilter *f){
update_energy(v,(int16_t*)m->b_rptr, (int)((m->b_wptr - m->b_rptr) / 2), f->ticker->time); update_energy(v,(int16_t*)m->b_rptr, (int)((m->b_wptr - m->b_rptr) / 2), f->ticker->time);
target_gain = v->static_gain; target_gain = v->static_gain;
if (v->silence_detection_enable) volume_vad_process(f, m);
if (v->noise_gate_enabled) volume_noise_gate_process(v, v->instant_energy, m); if (v->noise_gate_enabled) volume_noise_gate_process(v, v->instant_energy, m);
apply_gain(v, m, target_gain); apply_gain(v, m, target_gain);
ms_queue_put(f->outputs[0],m); ms_queue_put(f->outputs[0],m);
...@@ -594,8 +550,6 @@ static MSFilterMethod methods[]={ ...@@ -594,8 +550,6 @@ static MSFilterMethod methods[]={
{ MS_VOLUME_REMOVE_DC, volume_remove_dc }, { MS_VOLUME_REMOVE_DC, volume_remove_dc },
{ MS_VOLUME_GET_MIN , volume_get_min }, { MS_VOLUME_GET_MIN , volume_get_min },
{ MS_VOLUME_GET_MAX , volume_get_max }, { MS_VOLUME_GET_MAX , volume_get_max },
{ MS_VOLUME_ENABLE_SILENCE_DETECTION , volume_enable_silence_detection},
{ MS_VOLUME_SET_SILENCE_DURATION_THRESHOLD , volume_set_silence_duration_threshold},
{ 0 , NULL } { 0 , NULL }
}; };
......
...@@ -48,7 +48,11 @@ set(SOUND_FILES ...@@ -48,7 +48,11 @@ set(SOUND_FILES
sounds/sintel_trailer_opus_h264.mkv sounds/sintel_trailer_opus_h264.mkv
sounds/sintel_trailer_opus_vp8.mkv sounds/sintel_trailer_opus_vp8.mkv
sounds/sintel_trailer_pcmu_h264.mkv sounds/sintel_trailer_pcmu_h264.mkv
sounds/test_silence_voice.wav sounds/test_silence_voice_48000.wav
sounds/test_silence_voice_44100.wav
sounds/test_silence_voice_32000.wav
sounds/test_silence_voice_16000.wav
sounds/test_silence_voice_8000.wav
) )
set(SCENARIO_FILES set(SCENARIO_FILES
......
...@@ -50,19 +50,24 @@ static void tone_detected_cb(void *data, MSFilter *f, unsigned int event_id, MST ...@@ -50,19 +50,24 @@ static void tone_detected_cb(void *data, MSFilter *f, unsigned int event_id, MST
ms_tester_tone_detected = TRUE; ms_tester_tone_detected = TRUE;
} }
#if 0 //Remove this test until we found a good implem of VAD #define TEST_SILENCE_VOICE_48000_FILE_NAME "sounds/test_silence_voice_48000.wav"
#ifdef HAVE_SPEEXDSP #define TEST_SILENCE_VOICE_44100_FILE_NAME "sounds/test_silence_voice_44100.wav"
#define TEST_SILENCE_VOICE_32000_FILE_NAME "sounds/test_silence_voice_32000.wav"
#define TEST_SILENCE_VOICE_FILE_NAME "sounds/test_silence_voice.wav" #define TEST_SILENCE_VOICE_16000_FILE_NAME "sounds/test_silence_voice_16000.wav"
#define TEST_SILENCE_VOICE_8000_FILE_NAME "sounds/test_silence_voice_8000.wav"
#define MSWEBRTC_VAD_FILTER_NAME "MSWebRtcVADDec"
typedef struct struct_silence_callback_data { typedef struct struct_silence_callback_data {
int voice_detected_number; int voice_detected_number;
uint64_t silence_duration[10];
} silence_callback_data; } silence_callback_data;
static void silence_detected_cb(void *data, MSFilter *f, unsigned int event_id, void *arg) { static void silence_detected_cb(void *data, MSFilter *f, unsigned int event_id, void *arg) {
if (event_id == MS_VOLUME_EVENT_SILENCE_DETECTED) { silence_callback_data *silence = (silence_callback_data *)data;
silence_callback_data *silence = (silence_callback_data *)data; if (event_id == MS_VAD_EVENT_SILENCE_DETECTED) {
silence->voice_detected_number++; silence->voice_detected_number++;
} else if (event_id == MS_VAD_EVENT_SILENCE_ENDED) {
silence->silence_duration[silence->voice_detected_number-1] = *(uint64_t*)arg;
} }
} }
...@@ -77,15 +82,23 @@ static void player_cb(void *data, MSFilter *f, unsigned int event_id, void *arg) ...@@ -77,15 +82,23 @@ static void player_cb(void *data, MSFilter *f, unsigned int event_id, void *arg)
} }
} }
static void silence_detection(void) { // Waiting time in ms
static void _silence_detection(const char* filename, unsigned int duration_threshold, uint64_t* silence_duration, uint64_t delay, int vad_mode, int number_detection, int waiting_time) {
int sample_rate;
MSConnectionHelper h; MSConnectionHelper h;
silence_callback_data silence_data; silence_callback_data silence_data;
player_callback_data player_data; player_callback_data player_data;
MSFilter *voice_detector; MSFilter *voice_detector;
unsigned int filter_mask = FILTER_MASK_FILEPLAY | FILTER_MASK_VOIDSINK; unsigned int filter_mask = FILTER_MASK_FILEPLAY | FILTER_MASK_VOIDSINK;
char* recorded_file = bc_tester_res(TEST_SILENCE_VOICE_FILE_NAME);
unsigned int enable_silence = 1; unsigned int enable_silence = 1;
unsigned int duration_threshold = 1000; char* recorded_file = bc_tester_res(filename);
MSFilterDesc *vad_desc = ms_factory_lookup_filter_by_name(msFactory, MSWEBRTC_VAD_FILTER_NAME);
if (!recorded_file) return;
//Skip test if mswebrtc vad plugin not loaded
if (vad_desc == NULL) goto end;
silence_data.voice_detected_number = 0; silence_data.voice_detected_number = 0;
player_data.end_of_file = FALSE; player_data.end_of_file = FALSE;
...@@ -93,15 +106,18 @@ static void silence_detection(void) { ...@@ -93,15 +106,18 @@ static void silence_detection(void) {
ms_tester_create_ticker(); ms_tester_create_ticker();
ms_tester_create_filters(filter_mask, msFactory); ms_tester_create_filters(filter_mask, msFactory);
voice_detector = ms_factory_create_filter(msFactory, MS_VOLUME_ID); voice_detector = ms_factory_create_filter_from_desc(msFactory, vad_desc);
ms_filter_add_notify_callback(voice_detector, silence_detected_cb, &silence_data, TRUE); ms_filter_add_notify_callback(voice_detector, silence_detected_cb, &silence_data, TRUE);
ms_filter_add_notify_callback(ms_tester_fileplay, player_cb, &player_data, TRUE); ms_filter_add_notify_callback(ms_tester_fileplay, player_cb, &player_data, TRUE);
ms_filter_call_method(ms_tester_fileplay, MS_FILE_PLAYER_OPEN, recorded_file); ms_filter_call_method(ms_tester_fileplay, MS_FILE_PLAYER_OPEN, recorded_file);
ms_filter_call_method_noarg(ms_tester_fileplay, MS_FILE_PLAYER_START); ms_filter_call_method_noarg(ms_tester_fileplay, MS_FILE_PLAYER_START);
ms_filter_call_method(ms_tester_fileplay, MS_FILTER_GET_SAMPLE_RATE, &sample_rate);
ms_filter_call_method(voice_detector, MS_VOLUME_ENABLE_SILENCE_DETECTION, (void*)&enable_silence); ms_filter_call_method(voice_detector, MS_VAD_ENABLE_SILENCE_DETECTION, (void*)&enable_silence);
ms_filter_call_method(voice_detector, MS_VOLUME_SET_SILENCE_DURATION_THRESHOLD, (void*)&duration_threshold); ms_filter_call_method(voice_detector, MS_VAD_SET_SILENCE_DURATION_THRESHOLD, (void*)&duration_threshold);
ms_filter_call_method(voice_detector, MS_FILTER_SET_SAMPLE_RATE, (void*)&sample_rate);
ms_filter_call_method(voice_detector, MS_VAD_SET_MODE, (void*)&vad_mode);
ms_connection_helper_start(&h); ms_connection_helper_start(&h);
ms_connection_helper_link(&h, ms_tester_fileplay, -1, 0); ms_connection_helper_link(&h, ms_tester_fileplay, -1, 0);
...@@ -109,9 +125,15 @@ static void silence_detection(void) { ...@@ -109,9 +125,15 @@ static void silence_detection(void) {
ms_connection_helper_link(&h, ms_tester_voidsink, 0, -1); ms_connection_helper_link(&h, ms_tester_voidsink, 0, -1);
ms_ticker_attach(ms_tester_ticker, ms_tester_fileplay); ms_ticker_attach(ms_tester_ticker, ms_tester_fileplay);
BC_ASSERT_TRUE(wait_for_until(NULL, NULL, &player_data.end_of_file, TRUE, 26000)); BC_ASSERT_TRUE(wait_for_until(NULL, NULL, &player_data.end_of_file, TRUE, waiting_time));
// TODO Choice better example and check how many silence should be detected
BC_ASSERT_EQUAL(silence_data.voice_detected_number, 2, int, "%d"); BC_ASSERT_EQUAL(silence_data.voice_detected_number, number_detection, int, "%d");
if (number_detection > 0 && number_detection == silence_data.voice_detected_number) {
for (int i = 0 ; i < number_detection-1 ; i++) {
BC_ASSERT_LOWER_STRICT((unsigned long long)silence_data.silence_duration[i], (unsigned long long)(silence_duration[i] + delay), unsigned long long, "%llu");
BC_ASSERT_GREATER_STRICT((unsigned long long)silence_data.silence_duration[i], (unsigned long long)(silence_duration[i] - delay), unsigned long long, "%llu");
}
}
ms_filter_call_method_noarg(ms_tester_fileplay, MS_FILE_PLAYER_CLOSE); ms_filter_call_method_noarg(ms_tester_fileplay, MS_FILE_PLAYER_CLOSE);
ms_ticker_detach(ms_tester_ticker, ms_tester_fileplay); ms_ticker_detach(ms_tester_ticker, ms_tester_fileplay);
...@@ -128,10 +150,33 @@ static void silence_detection(void) { ...@@ -128,10 +150,33 @@ static void silence_detection(void) {
ms_tester_destroy_filters(filter_mask); ms_tester_destroy_filters(filter_mask);
ms_tester_destroy_ticker(); ms_tester_destroy_ticker();
end:
ms_free(recorded_file); ms_free(recorded_file);
} }
#endif
#endif //if 0 static void silence_detection_48000(void) {
uint64_t duration[5] = {3710, 2210, 1780, 5290};
_silence_detection(TEST_SILENCE_VOICE_48000_FILE_NAME, 1000, duration, 50, 3, 5, 26000);
}
static void silence_detection_44100(void) {
_silence_detection(TEST_SILENCE_VOICE_44100_FILE_NAME, 1000, NULL, 0, 3, 0, 26000);
}
static void silence_detection_32000(void) {
uint64_t duration[6] = {3710, 2210, 1780, 1050, 5290};
_silence_detection(TEST_SILENCE_VOICE_32000_FILE_NAME, 1000, duration, 50, 3, 6, 26000);
}
static void silence_detection_16000(void) {
uint64_t duration[6] = {3710, 2210, 1780, 1050, 5290};
_silence_detection(TEST_SILENCE_VOICE_16000_FILE_NAME, 1000, duration, 50, 3, 6, 26000);
}
static void silence_detection_8000(void) {
uint64_t duration[6] = {3710, 2210, 1780, 1050, 5290};
_silence_detection(TEST_SILENCE_VOICE_8000_FILE_NAME, 1000, duration, 50, 3, 6, 26000);
}
static void dtmfgen_tonedet(void) { static void dtmfgen_tonedet(void) {
MSConnectionHelper h; MSConnectionHelper h;
...@@ -489,11 +534,11 @@ static void dtmfgen_filerec_fileplay_tonedet(void) { ...@@ -489,11 +534,11 @@ static void dtmfgen_filerec_fileplay_tonedet(void) {
test_t basic_audio_tests[] = { test_t basic_audio_tests[] = {
#if 0 //Remove this test until we found a good implem of VAD TEST_ONE_TAG("silence detection 48000", silence_detection_48000, "VAD"),
#ifdef HAVE_SPEEXDSP TEST_ONE_TAG("silence detection 44100", silence_detection_44100, "VAD"),
TEST_NO_TAG("silence detection", silence_detection), TEST_ONE_TAG("silence detection 32000", silence_detection_32000, "VAD"),
#endif TEST_ONE_TAG("silence detection 16000", silence_detection_16000, "VAD"),
#endif TEST_ONE_TAG("silence detection 8000", silence_detection_8000, "VAD"),
TEST_NO_TAG("dtmfgen-tonedet", dtmfgen_tonedet), TEST_NO_TAG("dtmfgen-tonedet", dtmfgen_tonedet),
TEST_NO_TAG("dtmfgen-enc-dec-tonedet-bv16", dtmfgen_enc_dec_tonedet_bv16), TEST_NO_TAG("dtmfgen-enc-dec-tonedet-bv16", dtmfgen_enc_dec_tonedet_bv16),
TEST_NO_TAG("dtmfgen-enc-dec-tonedet-pcmu", dtmfgen_enc_dec_tonedet_pcmu), TEST_NO_TAG("dtmfgen-enc-dec-tonedet-pcmu", dtmfgen_enc_dec_tonedet_pcmu),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment