AmbiqAI · poyupaulchen · May 29, 2025 · May 29, 2025 · Aug 29, 2025 · Sep 16, 2025
diff --git a/Makefile b/Makefile
@@ -77,7 +77,7 @@ else
 	ifeq ($(EXAMPLE),all)
 		modules      += examples/basic_tf_stub
 		modules      += examples/har
-# modules      += examples/nnse2
+		modules      += examples/nnvad_tflm
 		modules	     += examples/uart
 		modules      += examples/rpc_server
 
@@ -96,6 +96,9 @@ else
 		endif
 
 		ifeq ($(USB_PRESENT),1)
+			modules      += examples/nnse_usb
+			modules      += examples/nnse_usb_tflm
+
 			modules      += examples/vision		
 			modules 	 += examples/ic
 			modules      += examples/quaternion

diff --git a/examples/nnse_usb/src/def_nn3_se.c b/examples/nnse_usb/src/def_nn3_se.c
@@ -7,6 +7,7 @@
 #include "lstm.h"
 #include "nn_speech.h"
 extern const int16_t stft_win_coeff_w480_h160[];
+extern const int16_t mfltrBank_coeff_nfilt72_fftsize512[];
 PARAMS_NNSP params_nn3_se = {
     .samplingRate = 16000,
     .fftsize = 512,
@@ -15,6 +16,7 @@ PARAMS_NNSP params_nn3_se = {
     .num_mfltrBank = 72,
     .num_dnsmpl = 1,
     .pt_stft_win_coeff = stft_win_coeff_w480_h160,
+    .p_melBanks = mfltrBank_coeff_nfilt72_fftsize512,
     .start_bin = 1,
     .is_dcrm = 1,
     .pre_gain_q1 = 10 << 1, // q1: 10

diff --git a/...sp/src/melSpec_coeff_nfilt72_fftsize512.c → ...sb/src/melSpec_coeff_nfilt72_fftsize512.c b/...sp/src/melSpec_coeff_nfilt72_fftsize512.c → ...sb/src/melSpec_coeff_nfilt72_fftsize512.c
diff --git a/examples/nnse_usb_tflm/README.md b/examples/nnse_usb_tflm/README.md
@@ -0,0 +1,48 @@
+# Speech-Enhancing Bluetooth Microphone Demo
+This example shows how to use [Ambiq's NNSE](https://github.com/AmbiqAI/nnse) to create a 'Speech Enhancing BLE Microphone'. NNSE is Ambiq's AI speech de-noiser, a real-time AI model that removes background noise (such as the noise from a hair-dryer) from human speech.
+
+```mermaid
+sequenceDiagram
+    participant Microphone-on-EVB
+    participant NNSE
+    participant Opus-Codec
+    participant WebUSB
+    participant Chrome-on-PC
+    Microphone-on-EVB->>+NNSE: 10ms Raw Audio
+    NNSE->>+Opus-Codec: 10ms Clean Audio
+    Microphone-on-EVB->>+NNSE: 10ms Raw Audio
+    NNSE->>+Opus-Codec: 10ms Clean Audio
+    Opus-Codec->>+BLE: 80 byte 20ms Opus Audio
+    WebUSB->>+Chrome-on-PC: 80 byte Opus Audio
+```
+
+The demo needs a WebUSB-enabled device (Apollo5, Apollo4 Plus KXR/KBR Blue), a microphone, and a PC or laptop running Chrome. It supports both PDM and AUDADC microphones, but is configured for PDM by default. To switch to AUDADC, uncomment this line in nnse_usb/src/nnse_usb.cc:
+
+```c
+// #define USE_AUDADC // Uncomment this to use the AUDADC instead of the PDM
+``````
+
+Running the demo.
+
+First, flash
+```bash
+$> make clean
+$> make -j
+$> make TARGET=nnse deploy
+```
+
+With the firmware deploy, follow this link on a PC using the Chrome browser: [Audio WebBLE Demo](https://ambiqai.github.io/web-ble-dashboards/audio/)
+
+The webpage should look something like this:
+
+![image-20231106120630313](../../docs/images/audio-ble-dashboard.png)
+
+Click on the 'Pair and start listening...' button to bring up a list of compatible bluetooth devices - one should be labeled 'Packet': that is the BLE device created by the NNSE firmware. Once paired, the web dashboard will play the audio (click the play button to allow the browser to actually play the stream) and show corresponding waveforms and spectrograms.
+
+Once everything is working and audio is streaming, you can turn de-noising on and off by pressing Button0 on the EVB.
+
+### Recommendations
+
+The EVB's microphone will pick up the audio being played by the PC, often leading to a feedback loop. We recommend using headphones connected to the laptop to prevent this feedback.
+
+For a more effective demonstration, use noise-cancelling headphones and introduce noise to the environment such as a hair dryer, traffic noise, or similar. The noise cancelling headphones should remove most of this ambient noise. When the EVB is in raw audio mode (as mentioned above, you can switch between modes using Button0 on the EVB), the EVB microphone's audio will be passed to the headphones, and the user will hear the ambient noise. When the EVB is in speech de-noising mode, the ambient noise will be removed but any speech will be cleaned up and passed through to the user's headphones.
diff --git a/examples/nnse_usb_tflm/module.mk b/examples/nnse_usb_tflm/module.mk
@@ -0,0 +1,14 @@
+local_app_name := nnse_usb_tflm
+local_src := $(wildcard $(subdirectory)/src/*.c)
+local_src += $(wildcard $(subdirectory)/src/*.cc)
+local_src += $(wildcard $(subdirectory)/src/*.cpp)
+local_src += $(wildcard $(subdirectory)/src/*.s)
+local_bin := $(BINDIR)/$(subdirectory)
+
+bindirs   += $(local_bin)
+examples  += $(local_bin)/$(local_app_name).axf
+examples  += $(local_bin)/$(local_app_name).bin
+# # mains 	+= $(wildcard $(subdirectory)/src/*.o)
+# # mains     += $(local_bin)/src/$(local_app_name).o
+# mains     += $(wildcard  $(local_bin)/src/*.o)
+$(eval $(call make-axf, $(local_bin)/$(local_app_name), $(local_src)))
diff --git a/examples/nnse_usb_tflm/src/AudioPipe_wrapper.h b/examples/nnse_usb_tflm/src/AudioPipe_wrapper.h
@@ -0,0 +1,15 @@
+#ifndef __AUDIO_PIPE_WRAPPER_H__
+#define __AUDIO_PIPE_WRAPPER_H__
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <stdint.h>
+int AudioPipe_wrapper_init(void);
+int AudioPipe_wrapper_reset(void);
+int AudioPipe_wrapper_frameProc(
+        int16_t *pcm_input,
+        int16_t *pcm_output);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/examples/nnse_usb_tflm/src/AudioPipe_wrapper_tflite.cc b/examples/nnse_usb_tflm/src/AudioPipe_wrapper_tflite.cc
@@ -0,0 +1,233 @@
+#include "arm_mve.h"
+#include "def_nnse_params.h"
+#include "mut_model_metadata.h"
+#include "mut_model_data.h"
+#include "def_nn3_se.h"
+#include "tflm_ns_model.h"
+#include <stdint.h>
+#include "AudioPipe_wrapper.h"
+
+#include "feature_module.h"
+#include "ns_ambiqsuite_harness.h"
+#include "nn_speech.h"
+#include "iir.h"
+#include "third_party/ns_cmsis_nn/Include/arm_nnsupportfunctions.h"
+extern int tflm_validator_model_init(ns_model_state_t *ms);
+// Feature class instance
+FeatureClass FEAT_INST;
+IIR_CLASS dcrm_inst;
+
+// TFLM Config
+static ns_model_state_t tflm;
+
+// TF Tensor Arena
+
+#if (TFLM_MODEL_LOCATION == NS_AD_PSRAM)
+    unsigned char *mut_model;
+#endif
+
+#if (TFLM_ARENA_LOCATION == NS_AD_PSRAM)
+    static uint8_t *tensor_arena;
+    static constexpr int kTensorArenaSize = 1024 * 1024 * 10; // 10MB
+#else
+    static constexpr int kTensorArenaSize = 1024 * TFLM_VALIDATOR_ARENA_SIZE;
+    // #ifdef AM_PART_APOLLO3
+    //     // Apollo3 doesn't have AM_SHARED_RW
+    //     alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
+    // #else // not AM_PART_APOLLO3
+        #if (TFLM_ARENA_LOCATION == NS_AD_SRAM)
+            #ifdef keil6
+            // Align to 16 bytes
+            AM_SHARED_RW __attribute__((aligned(16))) static uint8_t tensor_arena[kTensorArenaSize];
+            #else
+            AM_SHARED_RW alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
+            #endif
+        #else
+            NS_PUT_IN_TCM alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
+        #endif
+    // #endif
+#endif
+
+// Resource Variable Arena - always in TCM for now
+static constexpr int kVarArenaSize = 4096;
+    // 4 * (TFLM_VALIDATOR_MAX_RESOURCE_VARIABLES + 1) * sizeof(tflite::MicroResourceVariables);
+alignas(16) static uint8_t var_arena[kVarArenaSize];
+// Validator Stuff
+
+volatile int example_status = 0; // Prevent the compiler from optimizing out while loops
+
+extern const int16_t stft_win_coeff_w480_h160[];
+
+
+int8_t num_lookeahead = NUM_LOOKAHEAD;
+int32_t spec_buffer[514 * 4];
+int AudioPipe_wrapper_init(void)
+{ 
+    FeatureClass_construct(
+        &FEAT_INST,
+        (const int32_t*) feature_mean_se,
+        (const int32_t*) feature_stdR_se,
+        FEATURE_QBIT,
+        params_nn3_se.num_mfltrBank, // FEATURE_NUM_MFC
+        params_nn3_se.winsize_stft, // FEATURE_WINSIZE,
+        params_nn3_se.hopsize_stft, // FEATURE_HOPSIZE,
+        params_nn3_se.fftsize, // FEATURE_FFTSIZE,
+        params_nn3_se.pt_stft_win_coeff, // FEATURE_STFT_WIN_COEFF,
+        params_nn3_se.p_melBanks); // FEATURE_MELBANKS);
+
+    IIR_CLASS_init(&dcrm_inst);
+
+    // Initialize the model, get handle if successful
+
+    tflm.runtime = TFLM;
+    tflm.model_array = mut_model;
+    tflm.arena = tensor_arena;
+    tflm.arena_size = kTensorArenaSize;
+    tflm.rv_arena = var_arena;
+    tflm.rv_arena_size = kVarArenaSize;
+    tflm.rv_count = TFLM_VALIDATOR_MAX_RESOURCE_VARIABLES;
+    tflm.numInputTensors = 1;
+    tflm.numOutputTensors = 1;
+
+    int status = tflm_validator_model_init(&tflm); // model init with minimal defaults
+
+    if (status == NS_STATUS_FAILURE) {
+        while (1)
+            example_status = NS_STATUS_INIT_FAILED; // hang
+    }
+
+    // Get data about input and output tensors
+    int numInputs = tflm.numInputTensors;
+    int numOutputs = tflm.numOutputTensors;
+
+    ns_lp_printf("Model has %d inputs and %d outputs\n", numInputs, numOutputs);
+    ns_lp_printf("Input tensor 0 has %d bytes\n", tflm.model_input[0]->bytes);
+    ns_lp_printf("Output tensor 0 has %d bytes\n", tflm.model_output[0]->bytes);
+    ns_lp_printf("input scale=%f\n", tflm.model_input[0]->params.scale);
+    ns_lp_printf("input zero_point=%d\n", tflm.model_input[0]->params.zero_point);
+
+    ns_lp_printf("input dims=%d\n", tflm.model_input[0]->dims->size);
+    int input_dim = 1;
+    for (int i = 0; i < tflm.model_input[0]->dims->size; i++) {
+        input_dim *= tflm.model_input[0]->dims->data[i];
+        ns_lp_printf("input dim[%d]=%d\n", i, tflm.model_input[0]->dims->data[i]);
+    }
+    int output_dim=1;
+    for (int i = 0; i < tflm.model_output[0]->dims->size; i++) {
+        output_dim*= tflm.model_output[0]->dims->data[i];
+        ns_lp_printf("output dim[%d]=%d\n", i, tflm.model_output[0]->dims->data[i]);
+    }
+
+
+    ns_lp_printf("Model initialized\n");
+    return 0;
+}
+
+int AudioPipe_wrapper_reset(void)
+{
+    int32_t *pt_spec_buffer = spec_buffer;
+    if (num_lookeahead > 0)
+    {
+        for (int i = 0; i < 514 * num_lookeahead; i++)
+        {
+            pt_spec_buffer[i] = 0;
+        }
+    }
+    FeatureClass_setDefault(&FEAT_INST);
+    IIR_CLASS_reset(&dcrm_inst);
+    return 0;
+}
+
+int AudioPipe_wrapper_frameProc(
+        int16_t *pcm_input,
+        int16_t *pcm_output)
+{
+    /* feature extraction
+    1. iir for dc remove
+    2. melspectrogram
+    */
+    int32_t *pt_spec = FEAT_INST.state_stftModule.spec;
+    int32_t *pt_spec_buffer = spec_buffer;
+    int32_t tmp_spec[514];
+
+    static int16_t tmp_16s[300];
+    static float scalar_norm = 1.0 / (float) (1 << FEATURE_QBIT);
+
+    int32_t gain= (int32_t) params_nn3_se.pre_gain_q1;
+    for (int i = 0; i < params_nn3_se.hopsize_stft; i++)
+    {
+        int32_t tmp = (int32_t) pcm_input[i] * gain;
+        pcm_input[i] = (int16_t) MIN(MAX((tmp >> 1), -32768), 32767);
+    }
+
+
+
+    IIR_CLASS_exec(&dcrm_inst, tmp_16s, pcm_input, params_nn3_se.hopsize_stft);
+    FeatureClass_execute(&FEAT_INST, tmp_16s);
+
+    // move pt_spec to pt_spec_buffer
+    if (num_lookeahead > 0)
+    {
+        arm_memcpy_s8(
+            (int8_t*) tmp_spec,
+            (int8_t*) pt_spec,
+             514 * sizeof(int32_t));
+
+        arm_memcpy_s8(
+            (int8_t*) pt_spec_buffer,
+            (int8_t*) (pt_spec_buffer + 514),
+            514 * (num_lookeahead-1) * sizeof(int32_t));
+
+        arm_memcpy_s8(
+            (int8_t*) (pt_spec_buffer + 514 * (num_lookeahead-1)),
+            (int8_t*) pt_spec,
+            514 * sizeof(int32_t));
+
+        arm_memcpy_s8(
+            (int8_t*) pt_spec,
+            (int8_t*) tmp_spec,
+            514 * sizeof(int32_t));
+    }
+    int16_t *ptfeat = FEAT_INST.normFeatContext + params_nn3_se.num_mfltrBank * (FEATURE_CONTEXT-1);
+
+    float32_t input_scale = tflm.model_input[0]->params.scale;
+    int input_zero_point = tflm.model_input[0]->params.zero_point;    
+
+    for (int i =0; i < params_nn3_se.num_mfltrBank; i++)
+    {
+        float32_t val = ((float32_t) ptfeat[i] ) * scalar_norm;
+        int16_t input = (int16_t) ((float32_t) val / (float32_t) input_scale + (float32_t) input_zero_point);
+        tflm.model_input[0]->data.i16[i] =  input;
+    }
+
+    TfLiteStatus invoke_status = tflm.interpreter->Invoke(); 
+    if (invoke_status != kTfLiteOk) {
+        while (1)
+        {
+            example_status = NS_STATUS_FAILURE; // invoke failed, so hang
+        }
+    }
+    float32_t output_scale = tflm.model_output[0]->params.scale;
+    int output_zero_point = tflm.model_output[0]->params.zero_point;
+
+    for (int i = 0; i < NN_DIM_OUT; i++) {
+        float32_t out; 
+        out = (float32_t) (tflm.model_output[0]->data.i16[i] - output_zero_point);
+        out = out * output_scale;
+        int32_t out_32s = (int32_t)(out * 32768.0f); // scale to 16-bit range
+        // ns_lp_printf("%f ", out);
+        tmp_16s[i] = (int16_t) MAX(MIN(out_32s, 32767), -32768); // clamp to 16-bit range
+    }
+
+    // ns_lp_printf("\n");
+    // // get the tf mask
+    se_post_proc(
+        &FEAT_INST,
+        tmp_16s,
+        pcm_output,
+        0,
+        NN_DIM_OUT,
+        params_nn3_se.fftsize);
+    return 0;
+}
+