Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ else
ifeq ($(EXAMPLE),all)
modules += examples/basic_tf_stub
modules += examples/har
# modules += examples/nnse2
modules += examples/nnvad_tflm
modules += examples/uart
modules += examples/rpc_server

Expand All @@ -96,6 +96,9 @@ else
endif

ifeq ($(USB_PRESENT),1)
modules += examples/nnse_usb
modules += examples/nnse_usb_tflm

modules += examples/vision
modules += examples/ic
modules += examples/quaternion
Expand Down
2 changes: 2 additions & 0 deletions examples/nnse_usb/src/def_nn3_se.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "lstm.h"
#include "nn_speech.h"
extern const int16_t stft_win_coeff_w480_h160[];
extern const int16_t mfltrBank_coeff_nfilt72_fftsize512[];
PARAMS_NNSP params_nn3_se = {
.samplingRate = 16000,
.fftsize = 512,
Expand All @@ -15,6 +16,7 @@ PARAMS_NNSP params_nn3_se = {
.num_mfltrBank = 72,
.num_dnsmpl = 1,
.pt_stft_win_coeff = stft_win_coeff_w480_h160,
.p_melBanks = mfltrBank_coeff_nfilt72_fftsize512,
.start_bin = 1,
.is_dcrm = 1,
.pre_gain_q1 = 10 << 1, // q1: 10
Expand Down
48 changes: 48 additions & 0 deletions examples/nnse_usb_tflm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Speech-Enhancing Bluetooth Microphone Demo
This example shows how to use [Ambiq's NNSE](https://github.com/AmbiqAI/nnse) to create a 'Speech Enhancing BLE Microphone'. NNSE is Ambiq's AI speech de-noiser, a real-time AI model that removes background noise (such as the noise from a hair-dryer) from human speech.

```mermaid
sequenceDiagram
participant Microphone-on-EVB
participant NNSE
participant Opus-Codec
participant WebUSB
participant Chrome-on-PC
Microphone-on-EVB->>+NNSE: 10ms Raw Audio
NNSE->>+Opus-Codec: 10ms Clean Audio
Microphone-on-EVB->>+NNSE: 10ms Raw Audio
NNSE->>+Opus-Codec: 10ms Clean Audio
Opus-Codec->>+BLE: 80 byte 20ms Opus Audio
WebUSB->>+Chrome-on-PC: 80 byte Opus Audio
```

The demo needs a WebUSB-enabled device (Apollo5, Apollo4 Plus KXR/KBR Blue), a microphone, and a PC or laptop running Chrome. It supports both PDM and AUDADC microphones, but is configured for PDM by default. To switch to AUDADC, uncomment this line in nnse_usb/src/nnse_usb.cc:

```c
// #define USE_AUDADC // Uncomment this to use the AUDADC instead of the PDM
``````

Running the demo.

First, flash
```bash
$> make clean
$> make -j
$> make TARGET=nnse deploy
```

With the firmware deploy, follow this link on a PC using the Chrome browser: [Audio WebBLE Demo](https://ambiqai.github.io/web-ble-dashboards/audio/)

The webpage should look something like this:

![image-20231106120630313](../../docs/images/audio-ble-dashboard.png)

Click on the 'Pair and start listening...' button to bring up a list of compatible bluetooth devices - one should be labeled 'Packet': that is the BLE device created by the NNSE firmware. Once paired, the web dashboard will play the audio (click the play button to allow the browser to actually play the stream) and show corresponding waveforms and spectrograms.

Once everything is working and audio is streaming, you can turn de-noising on and off by pressing Button0 on the EVB.

### Recommendations

The EVB's microphone will pick up the audio being played by the PC, often leading to a feedback loop. We recommend using headphones connected to the laptop to prevent this feedback.

For a more effective demonstration, use noise-cancelling headphones and introduce noise to the environment such as a hair dryer, traffic noise, or similar. The noise cancelling headphones should remove most of this ambient noise. When the EVB is in raw audio mode (as mentioned above, you can switch between modes using Button0 on the EVB), the EVB microphone's audio will be passed to the headphones, and the user will hear the ambient noise. When the EVB is in speech de-noising mode, the ambient noise will be removed but any speech will be cleaned up and passed through to the user's headphones.
14 changes: 14 additions & 0 deletions examples/nnse_usb_tflm/module.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
local_app_name := nnse_usb_tflm
local_src := $(wildcard $(subdirectory)/src/*.c)
local_src += $(wildcard $(subdirectory)/src/*.cc)
local_src += $(wildcard $(subdirectory)/src/*.cpp)
local_src += $(wildcard $(subdirectory)/src/*.s)
local_bin := $(BINDIR)/$(subdirectory)

bindirs += $(local_bin)
examples += $(local_bin)/$(local_app_name).axf
examples += $(local_bin)/$(local_app_name).bin
# # mains += $(wildcard $(subdirectory)/src/*.o)
# # mains += $(local_bin)/src/$(local_app_name).o
# mains += $(wildcard $(local_bin)/src/*.o)
$(eval $(call make-axf, $(local_bin)/$(local_app_name), $(local_src)))
15 changes: 15 additions & 0 deletions examples/nnse_usb_tflm/src/AudioPipe_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef __AUDIO_PIPE_WRAPPER_H__
#define __AUDIO_PIPE_WRAPPER_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
int AudioPipe_wrapper_init(void);
int AudioPipe_wrapper_reset(void);
int AudioPipe_wrapper_frameProc(
int16_t *pcm_input,
int16_t *pcm_output);
#ifdef __cplusplus
}
#endif
#endif
233 changes: 233 additions & 0 deletions examples/nnse_usb_tflm/src/AudioPipe_wrapper_tflite.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#include "arm_mve.h"
#include "def_nnse_params.h"
#include "mut_model_metadata.h"
#include "mut_model_data.h"
#include "def_nn3_se.h"
#include "tflm_ns_model.h"
#include <stdint.h>
#include "AudioPipe_wrapper.h"

#include "feature_module.h"
#include "ns_ambiqsuite_harness.h"
#include "nn_speech.h"
#include "iir.h"
#include "third_party/ns_cmsis_nn/Include/arm_nnsupportfunctions.h"
extern int tflm_validator_model_init(ns_model_state_t *ms);
// Feature class instance
FeatureClass FEAT_INST;
IIR_CLASS dcrm_inst;

// TFLM Config
static ns_model_state_t tflm;

// TF Tensor Arena

#if (TFLM_MODEL_LOCATION == NS_AD_PSRAM)
unsigned char *mut_model;
#endif

#if (TFLM_ARENA_LOCATION == NS_AD_PSRAM)
static uint8_t *tensor_arena;
static constexpr int kTensorArenaSize = 1024 * 1024 * 10; // 10MB
#else
static constexpr int kTensorArenaSize = 1024 * TFLM_VALIDATOR_ARENA_SIZE;
// #ifdef AM_PART_APOLLO3
// // Apollo3 doesn't have AM_SHARED_RW
// alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
// #else // not AM_PART_APOLLO3
#if (TFLM_ARENA_LOCATION == NS_AD_SRAM)
#ifdef keil6
// Align to 16 bytes
AM_SHARED_RW __attribute__((aligned(16))) static uint8_t tensor_arena[kTensorArenaSize];
#else
AM_SHARED_RW alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
#endif
#else
NS_PUT_IN_TCM alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
#endif
// #endif
#endif

// Resource Variable Arena - always in TCM for now
static constexpr int kVarArenaSize = 4096;
// 4 * (TFLM_VALIDATOR_MAX_RESOURCE_VARIABLES + 1) * sizeof(tflite::MicroResourceVariables);
alignas(16) static uint8_t var_arena[kVarArenaSize];
// Validator Stuff

volatile int example_status = 0; // Prevent the compiler from optimizing out while loops

extern const int16_t stft_win_coeff_w480_h160[];


int8_t num_lookeahead = NUM_LOOKAHEAD;
int32_t spec_buffer[514 * 4];
int AudioPipe_wrapper_init(void)
{
FeatureClass_construct(
&FEAT_INST,
(const int32_t*) feature_mean_se,
(const int32_t*) feature_stdR_se,
FEATURE_QBIT,
params_nn3_se.num_mfltrBank, // FEATURE_NUM_MFC
params_nn3_se.winsize_stft, // FEATURE_WINSIZE,
params_nn3_se.hopsize_stft, // FEATURE_HOPSIZE,
params_nn3_se.fftsize, // FEATURE_FFTSIZE,
params_nn3_se.pt_stft_win_coeff, // FEATURE_STFT_WIN_COEFF,
params_nn3_se.p_melBanks); // FEATURE_MELBANKS);

IIR_CLASS_init(&dcrm_inst);

// Initialize the model, get handle if successful

tflm.runtime = TFLM;
tflm.model_array = mut_model;
tflm.arena = tensor_arena;
tflm.arena_size = kTensorArenaSize;
tflm.rv_arena = var_arena;
tflm.rv_arena_size = kVarArenaSize;
tflm.rv_count = TFLM_VALIDATOR_MAX_RESOURCE_VARIABLES;
tflm.numInputTensors = 1;
tflm.numOutputTensors = 1;

int status = tflm_validator_model_init(&tflm); // model init with minimal defaults

if (status == NS_STATUS_FAILURE) {
while (1)
example_status = NS_STATUS_INIT_FAILED; // hang
}

// Get data about input and output tensors
int numInputs = tflm.numInputTensors;
int numOutputs = tflm.numOutputTensors;

ns_lp_printf("Model has %d inputs and %d outputs\n", numInputs, numOutputs);
ns_lp_printf("Input tensor 0 has %d bytes\n", tflm.model_input[0]->bytes);
ns_lp_printf("Output tensor 0 has %d bytes\n", tflm.model_output[0]->bytes);
ns_lp_printf("input scale=%f\n", tflm.model_input[0]->params.scale);
ns_lp_printf("input zero_point=%d\n", tflm.model_input[0]->params.zero_point);

ns_lp_printf("input dims=%d\n", tflm.model_input[0]->dims->size);
int input_dim = 1;
for (int i = 0; i < tflm.model_input[0]->dims->size; i++) {
input_dim *= tflm.model_input[0]->dims->data[i];
ns_lp_printf("input dim[%d]=%d\n", i, tflm.model_input[0]->dims->data[i]);
}
int output_dim=1;
for (int i = 0; i < tflm.model_output[0]->dims->size; i++) {
output_dim*= tflm.model_output[0]->dims->data[i];
ns_lp_printf("output dim[%d]=%d\n", i, tflm.model_output[0]->dims->data[i]);
}


ns_lp_printf("Model initialized\n");
return 0;
}

int AudioPipe_wrapper_reset(void)
{
int32_t *pt_spec_buffer = spec_buffer;
if (num_lookeahead > 0)
{
for (int i = 0; i < 514 * num_lookeahead; i++)
{
pt_spec_buffer[i] = 0;
}
}
FeatureClass_setDefault(&FEAT_INST);
IIR_CLASS_reset(&dcrm_inst);
return 0;
}

int AudioPipe_wrapper_frameProc(
int16_t *pcm_input,
int16_t *pcm_output)
{
/* feature extraction
1. iir for dc remove
2. melspectrogram
*/
int32_t *pt_spec = FEAT_INST.state_stftModule.spec;
int32_t *pt_spec_buffer = spec_buffer;
int32_t tmp_spec[514];

static int16_t tmp_16s[300];
static float scalar_norm = 1.0 / (float) (1 << FEATURE_QBIT);

int32_t gain= (int32_t) params_nn3_se.pre_gain_q1;
for (int i = 0; i < params_nn3_se.hopsize_stft; i++)
{
int32_t tmp = (int32_t) pcm_input[i] * gain;
pcm_input[i] = (int16_t) MIN(MAX((tmp >> 1), -32768), 32767);
}



IIR_CLASS_exec(&dcrm_inst, tmp_16s, pcm_input, params_nn3_se.hopsize_stft);
FeatureClass_execute(&FEAT_INST, tmp_16s);

// move pt_spec to pt_spec_buffer
if (num_lookeahead > 0)
{
arm_memcpy_s8(
(int8_t*) tmp_spec,
(int8_t*) pt_spec,
514 * sizeof(int32_t));

arm_memcpy_s8(
(int8_t*) pt_spec_buffer,
(int8_t*) (pt_spec_buffer + 514),
514 * (num_lookeahead-1) * sizeof(int32_t));

arm_memcpy_s8(
(int8_t*) (pt_spec_buffer + 514 * (num_lookeahead-1)),
(int8_t*) pt_spec,
514 * sizeof(int32_t));

arm_memcpy_s8(
(int8_t*) pt_spec,
(int8_t*) tmp_spec,
514 * sizeof(int32_t));
}
int16_t *ptfeat = FEAT_INST.normFeatContext + params_nn3_se.num_mfltrBank * (FEATURE_CONTEXT-1);

float32_t input_scale = tflm.model_input[0]->params.scale;
int input_zero_point = tflm.model_input[0]->params.zero_point;

for (int i =0; i < params_nn3_se.num_mfltrBank; i++)
{
float32_t val = ((float32_t) ptfeat[i] ) * scalar_norm;
int16_t input = (int16_t) ((float32_t) val / (float32_t) input_scale + (float32_t) input_zero_point);
tflm.model_input[0]->data.i16[i] = input;
}

TfLiteStatus invoke_status = tflm.interpreter->Invoke();
if (invoke_status != kTfLiteOk) {
while (1)
{
example_status = NS_STATUS_FAILURE; // invoke failed, so hang
}
}
float32_t output_scale = tflm.model_output[0]->params.scale;
int output_zero_point = tflm.model_output[0]->params.zero_point;

for (int i = 0; i < NN_DIM_OUT; i++) {
float32_t out;
out = (float32_t) (tflm.model_output[0]->data.i16[i] - output_zero_point);
out = out * output_scale;
int32_t out_32s = (int32_t)(out * 32768.0f); // scale to 16-bit range
// ns_lp_printf("%f ", out);
tmp_16s[i] = (int16_t) MAX(MIN(out_32s, 32767), -32768); // clamp to 16-bit range
}

// ns_lp_printf("\n");
// // get the tf mask
se_post_proc(
&FEAT_INST,
tmp_16s,
pcm_output,
0,
NN_DIM_OUT,
params_nn3_se.fftsize);
return 0;
}

Loading